Postgres Long Running Active Queries Send To Slack

June 27, 2019June 27, 2019 admin2 Comments

I needed a utility to alert our team when any long running queries were running on a production postgres cluster. I came up with the following python code that achieves just that. This would alert slack if an active query exceeds 45 mins. The script takes in user parameters as well, I will demonstrate the way to call it. Hope it helps someone.

CRON CALL:

### postgres long running query check ###
*/15 * * * * /usr/bin/python2.7 /home/postgres/bin/pg_long_running_query.py --database proddb --dbhost proddb01 --user postgres --alert_time_mins 45 >> /home/postgres/pg_long_running_query.log 2>&1

1 2	### postgres long running query check ### /15 * * * /usr/bin/python2.7 /home/postgres/bin/pg_long_running_query.py --database proddb --dbhost proddb01 --user postgres --alert_time_mins 45 >> /home/postgres/pg_long_running_query.log 2>&1

CODE:

#!/usr/bin/python2.7

__author__ = "Jason Ralph"


import psycopg2
import psycopg2.extras
import argparse
import urllib


def send_message_to_slack(text):
    import requests
    import json

    webhook_url = 'https://hooks.slack.com/services/--redacted--'
    slack_data = {'text': "%s" % text}

    response = requests.post(
        webhook_url, data=json.dumps(slack_data),
        headers={'Content-Type': 'application/json'}
    )
    if response.status_code != 200:
        raise ValueError(
            'Request to slack returned an error %s, the response is:\n%s'
            % (response.status_code, response.text)
    )


def get_long_running_queries():
    parser = argparse.ArgumentParser(description='Check long Running '
                                                 'Queries On Postgres '
                                                 'Databases And Alert')
    parser.add_argument('--database', help='target database')
    parser.add_argument('--dbhost', help='target dbhost')
    parser.add_argument('--user', help='database user')
    parser.add_argument('--alert_time_mins', help='alert time in mins: e.g 30')
    args = parser.parse_args()

    conn = psycopg2.connect("dbname='%s' host='%s' user='%s' port=5432" 
                            % (args.database, args.dbhost, args.user))

    sql = ("""SELECT pid, usename,
              now() - pg_stat_activity.query_start AS duration,
              query, state FROM pg_stat_activity 
              WHERE (now() - pg_stat_activity.query_start) > interval
               '"%s" minutes';""") % args.alert_time_mins

    cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
    cursor.execute(sql)
    count = 0
    while True:
        row = cursor.fetchone()
        if row is None:
            break
        if row['usename'] == 'postgres':
            continue
        if row['state'] == 'idle':
            continue
        count += 1
        pid = row['pid']
        user = row['usename']
        duration = row['duration']
        query = row['query']
        state = row['state']
        msg_items = ['LONG RUNNING QUERY ON HOST: %s\n'
                     % args.dbhost, 'PID: %s\n' % pid,
                     'DURATION: %s\n' % duration,
                     'QUERY: %s\n' % query,
                     'STATE: %s\n' % state,
                     'USER: %s\n' % user,
                     'COUNT: %s\n' % count]                                                      
        msg = ''.join(msg_items)
        send_message_to_slack(msg)
    conn.close()

def main():
    get_long_running_queries()

if __name__ == '__main__':
    main()

#!/usr/bin/python2.7

__author__ = "Jason Ralph"

import psycopg2

import psycopg2.extras

import argparse

import urllib

def send_message_to_slack(text):

import requests

import json

webhook_url = 'https://hooks.slack.com/services/--redacted--'

slack_data = {'text': "%s" % text}

response = requests.post(

webhook_url, data=json.dumps(slack_data),

headers={'Content-Type': 'application/json'}

)

if response.status_code != 200:

raise ValueError(

'Request to slack returned an error %s, the response is:\n%s'

% (response.status_code, response.text)

)

def get_long_running_queries():

parser = argparse.ArgumentParser(description='Check long Running '

'Queries On Postgres '

'Databases And Alert')

parser.add_argument('--database', help='target database')

parser.add_argument('--dbhost', help='target dbhost')

parser.add_argument('--user', help='database user')

parser.add_argument('--alert_time_mins', help='alert time in mins: e.g 30')

args = parser.parse_args()

conn = psycopg2.connect("dbname='%s' host='%s' user='%s' port=5432"

% (args.database, args.dbhost, args.user))

sql = ("""SELECT pid, usename,

now() - pg_stat_activity.query_start AS duration,

query, state FROM pg_stat_activity

WHERE (now() - pg_stat_activity.query_start) > interval

'"%s" minutes';""") % args.alert_time_mins

cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)

cursor.execute(sql)

count = 0

while True:

row = cursor.fetchone()

if row is None:

break

if row['usename'] == 'postgres':

continue

if row['state'] == 'idle':

continue

count += 1

pid = row['pid']

user = row['usename']

duration = row['duration']

query = row['query']

state = row['state']

msg_items = ['LONG RUNNING QUERY ON HOST: %s\n'

% args.dbhost, 'PID: %s\n' % pid,

'DURATION: %s\n' % duration,

'QUERY: %s\n' % query,

'STATE: %s\n' % state,

'USER: %s\n' % user,

'COUNT: %s\n' % count]

msg = ''.join(msg_items)

send_message_to_slack(msg)

conn.close()

def main():

get_long_running_queries()

if __name__ == '__main__':

main()

SLACK MESSAGE:

LONG RUNNING QUERY ON HOST: proddb01
PID: 30270
DURATION: 0:55:02.748624
QUERY: SELECT --redacted--
STATE: active
USER: dbuser
COUNT: 1

LONG RUNNING QUERY ON HOST: proddb01

PID: 30270

DURATION: 0:55:02.748624

QUERY: SELECT --redacted--

STATE: active

USER: dbuser

COUNT: 1

Python Function Execute Subprocess With Timeout

June 23, 2019October 15, 2020 adminLeave a comment

I have a project that rsync’s data from an RPM repository for a local version of this repo. The issue I was faced with was the remote mirror would sometimes stop the rsync due to overloaded network or other unforeseen issues. I wanted to use rsyncs hashing algorithm to have it start right where it left off so I wrote a function to do this. If 900 seconds was hit it usually meant there was an issue with the transfer. I also want to state here that I observed the rsync stop serving issue on many mirrors so it was not just an issue with the TCP network. I use this in production and it logs each iteration or restart. The function below will also kill the current rsync so multiple copies are not running at the same time. I also only wanted to perform 5 iterations of rsync upon error or timeout so I use a while loop here.

Here are the individual rsync commands in the INI configuration.

[rsync_cmds]
rsync01 = /usr/local/bin/rsync -a rsync://mirror.cogentco.com/CentOS/7/os/x86_64/ 7/x86_64/
rsync02 = /usr/local/bin/rsync -a rsync://mirror.cogentco.com/CentOS/7/updates/x86_64/ 7/updates/x86_64/
rsync03 = /usr/local/bin/rsync -a rsync://mirror.cogentco.com/CentOS/7/centosplus/x86_64/ 7/centosplus/x86_64/
rsync04 = /usr/local/bin/rsync -a rsync://mirror.cogentco.com/CentOS/7/extras/x86_64/ 7/extras/x86_64

[rsync_cmds]

rsync01 = /usr/local/bin/rsync -a rsync://mirror.cogentco.com/CentOS/7/os/x86_64/ 7/x86_64/

rsync02 = /usr/local/bin/rsync -a rsync://mirror.cogentco.com/CentOS/7/updates/x86_64/ 7/updates/x86_64/

rsync03 = /usr/local/bin/rsync -a rsync://mirror.cogentco.com/CentOS/7/centosplus/x86_64/ 7/centosplus/x86_64/

rsync04 = /usr/local/bin/rsync -a rsync://mirror.cogentco.com/CentOS/7/extras/x86_64/ 7/extras/x86_64

Here is how I call the execute_jobs_timeout() function:

rsync_commands = dict(config.items('rsync_cmds'))
def rsync_data():
    for name, cmds in sorted(rsync_commands.items()):
        execute_jobs_timeout(cmds)

rsync_commands = dict(config.items('rsync_cmds'))

def rsync_data():

for name, cmds in sorted(rsync_commands.items()):

execute_jobs_timeout(cmds)

The function:

def execute_jobs_timeout(cmd):
    iteration = 0
    while iteration < 5:
        proc = subprocess.Popen(shlex.split(cmd),
                                start_new_session=True)
        try:
            logger.info('Start Command: [%s]' % sanitize(cmd))
            stdout_data, stderr_data = proc.communicate(timeout=900)
            if proc.returncode != 0:
                logger.critical(
                    "%r failed, status code %s stdout %r stderr %r" % (
                        sanitize(cmd), proc.returncode,
                        stdout_data, stderr_data))
                iteration += 1
                if iteration == 5:
                    logger.critical('Execute Jobs Failed After 5 Iterations.')
                    break
                continue
            logger.info('Success: [%s]' % sanitize(cmd))
            break
        except (subprocess.TimeoutExpired, subprocess.SubprocessError) as e:
            os.killpg(os.getpgid(proc.pid), signal.SIGKILL)
            logger.warning('[%s]' % e)
            logger.info('Restarting [%s]' % sanitize(cmd))
            iteration += 1
            if iteration == 5:
                logger.critical('Execute Jobs Failed After 5 Iterations.')
                break
            continue

def execute_jobs_timeout(cmd):

iteration = 0

while iteration < 5:

proc = subprocess.Popen(shlex.split(cmd),

start_new_session=True)

try:

logger.info('Start Command: [%s]' % sanitize(cmd))

stdout_data, stderr_data = proc.communicate(timeout=900)

if proc.returncode != 0:

logger.critical(

"%r failed, status code %s stdout %r stderr %r" % (

sanitize(cmd), proc.returncode,

stdout_data, stderr_data))

iteration += 1

if iteration == 5:

logger.critical('Execute Jobs Failed After 5 Iterations.')

break

continue

logger.info('Success: [%s]' % sanitize(cmd))

break

except (subprocess.TimeoutExpired, subprocess.SubprocessError) as e:

os.killpg(os.getpgid(proc.pid), signal.SIGKILL)

logger.warning('[%s]' % e)

logger.info('Restarting [%s]' % sanitize(cmd))

iteration += 1

if iteration == 5:

logger.critical('Execute Jobs Failed After 5 Iterations.')

break

continue

Log Snippet showing each command executing:

2019-05-25 03:15:03,872 - __main__ - INFO - Restarting [/usr/local/bin/rsync -a rsync://mirror.cogentco.com/CentOS/7/os/x86_64/ 7/x86_64/] - devdbadmin
2019-05-25 03:15:03,875 - __main__ - INFO - Start Command: [/usr/local/bin/rsync -a rsync://mirror.cogentco.com/CentOS/7/os/x86_64/ 7/x86_64/] - devdbadmin
2019-05-25 03:27:53,801 - __main__ - INFO - Success: [/usr/local/bin/rsync -a rsync://mirror.cogentco.com/CentOS/7/os/x86_64/ 7/x86_64/] - devdbadmin
2019-05-25 03:27:53,821 - __main__ - INFO - Start Command: [/usr/local/bin/rsync -a rsync://mirror.cogentco.com/CentOS/7/updates/x86_64/ 7/updates/x86_64/] - devdbadmin
2019-05-25 03:42:53,821 - __main__ - WARNING - [Command '['/usr/local/bin/rsync', '-a', 'rsync://mirror.cogentco.com/CentOS/7/updates/x86_64/', '7/updates/x86_64/']' timed out after 899.9999316609465 seconds] - devdbadmin
2019-05-25 03:42:53,822 - __main__ - INFO - Restarting [/usr/local/bin/rsync -a rsync://mirror.cogentco.com/CentOS/7/updates/x86_64/ 7/updates/x86_64/] - devdbadmin
2019-05-25 03:42:53,850 - __main__ - INFO - Start Command: [/usr/local/bin/rsync -a rsync://mirror.cogentco.com/CentOS/7/updates/x86_64/ 7/updates/x86_64/] - devdbadmin
2019-05-25 03:57:53,851 - __main__ - WARNING - [Command '['/usr/local/bin/rsync', '-a', 'rsync://mirror.cogentco.com/CentOS/7/updates/x86_64/', '7/updates/x86_64/']' timed out after 899.9999369028956 seconds] - devdbadmin
2019-05-25 03:57:53,852 - __main__ - INFO - Restarting [/usr/local/bin/rsync -a rsync://mirror.cogentco.com/CentOS/7/updates/x86_64/ 7/updates/x86_64/] - devdbadmin
2019-05-25 03:57:53,854 - __main__ - INFO - Start Command: [/usr/local/bin/rsync -a rsync://mirror.cogentco.com/CentOS/7/updates/x86_64/ 7/updates/x86_64/] - devdbadmin
2019-05-25 04:01:28,522 - __main__ - INFO - Success: [/usr/local/bin/rsync -a rsync://mirror.cogentco.com/CentOS/7/updates/x86_64/ 7/updates/x86_64/] - devdbadmin
2019-05-25 04:01:28,524 - __main__ - INFO - Start Command: [/usr/local/bin/rsync -a rsync://mirror.cogentco.com/CentOS/7/centosplus/x86_64/ 7/centosplus/x86_64/] - devdbadmin
2019-05-25 04:16:28,527 - __main__ - WARNING - [Command '['/usr/local/bin/rsync', '-a', 'rsync://mirror.cogentco.com/CentOS/7/centosplus/x86_64/', '7/centosplus/x86_64/']' timed out after 899.9999288369436 seconds] - devdbadmin

2019-05-25 03:15:03,872 - __main__ - INFO - Restarting [/usr/local/bin/rsync -a rsync://mirror.cogentco.com/CentOS/7/os/x86_64/ 7/x86_64/] - devdbadmin

2019-05-25 03:15:03,875 - __main__ - INFO - Start Command: [/usr/local/bin/rsync -a rsync://mirror.cogentco.com/CentOS/7/os/x86_64/ 7/x86_64/] - devdbadmin

2019-05-25 03:27:53,801 - __main__ - INFO - Success: [/usr/local/bin/rsync -a rsync://mirror.cogentco.com/CentOS/7/os/x86_64/ 7/x86_64/] - devdbadmin

2019-05-25 03:27:53,821 - __main__ - INFO - Start Command: [/usr/local/bin/rsync -a rsync://mirror.cogentco.com/CentOS/7/updates/x86_64/ 7/updates/x86_64/] - devdbadmin

2019-05-25 03:42:53,821 - __main__ - WARNING - [Command '['/usr/local/bin/rsync', '-a', 'rsync://mirror.cogentco.com/CentOS/7/updates/x86_64/', '7/updates/x86_64/']' timed out after 899.9999316609465 seconds] - devdbadmin

2019-05-25 03:42:53,822 - __main__ - INFO - Restarting [/usr/local/bin/rsync -a rsync://mirror.cogentco.com/CentOS/7/updates/x86_64/ 7/updates/x86_64/] - devdbadmin

2019-05-25 03:42:53,850 - __main__ - INFO - Start Command: [/usr/local/bin/rsync -a rsync://mirror.cogentco.com/CentOS/7/updates/x86_64/ 7/updates/x86_64/] - devdbadmin

2019-05-25 03:57:53,851 - __main__ - WARNING - [Command '['/usr/local/bin/rsync', '-a', 'rsync://mirror.cogentco.com/CentOS/7/updates/x86_64/', '7/updates/x86_64/']' timed out after 899.9999369028956 seconds] - devdbadmin

2019-05-25 03:57:53,852 - __main__ - INFO - Restarting [/usr/local/bin/rsync -a rsync://mirror.cogentco.com/CentOS/7/updates/x86_64/ 7/updates/x86_64/] - devdbadmin

2019-05-25 03:57:53,854 - __main__ - INFO - Start Command: [/usr/local/bin/rsync -a rsync://mirror.cogentco.com/CentOS/7/updates/x86_64/ 7/updates/x86_64/] - devdbadmin

2019-05-25 04:01:28,522 - __main__ - INFO - Success: [/usr/local/bin/rsync -a rsync://mirror.cogentco.com/CentOS/7/updates/x86_64/ 7/updates/x86_64/] - devdbadmin

2019-05-25 04:01:28,524 - __main__ - INFO - Start Command: [/usr/local/bin/rsync -a rsync://mirror.cogentco.com/CentOS/7/centosplus/x86_64/ 7/centosplus/x86_64/] - devdbadmin

2019-05-25 04:16:28,527 - __main__ - WARNING - [Command '['/usr/local/bin/rsync', '-a', 'rsync://mirror.cogentco.com/CentOS/7/centosplus/x86_64/', '7/centosplus/x86_64/']' timed out after 899.9999288369436 seconds] - devdbadmin

CENTOS6 Postgres pg_upgrade 9 to 11 – In Place – Link – No Copy – Limited Disk Space

April 17, 2019November 13, 2019 admin1 Comment

I wanted to share my experience with upgrading postgres database server from major version 9 to 11. I am showing the steps that I took to get many servers in dev and production upgraded with limited disk space(not enough space to copy). I am hoping this will help with the problems I faced when testing this procedure. Using the –link parameter has drawbacks as noted in the documentation, however we perform full VM backups of each server so we can always restore from backup if the upgrade fails and we will not need to start the pg9.3 database again.

https://www.postgresql.org/docs/11/pgupgrade.html

-k --link

use hard links instead of copying files to the new cluster If you ran pg_upgrade with --link, the data files are shared between the old and new cluster. If you started the new cluster, the new server has written to those shared files and it is unsafe to use the old cluster.

Before we get started make a backup of the files pg_hba.conf and postgresql.conf for later use, you will need to use them later to reconstruct the pg11 configs.

[root@jr-sandbox ~]# cp /data1/data93/pg_hba.conf /root/
[root@jr-sandbox ~]# cp /data1/data93/postgresql.conf /root/

1 2	[root@jr-sandbox ~]# cp /data1/data93/pg_hba.conf /root/ [root@jr-sandbox ~]# cp /data1/data93/postgresql.conf /root/

Use WGET to grab the RPMS from https://yum.postgresql.org

[root@jr-sandbox pg_11]# wget https://yum.postgresql.org/11/redhat/rhel-6-x86_64/postgresql11-11.2-2PGDG.rhel6.x86_64.rpm
[root@jr-sandbox pg_11]# wget https://yum.postgresql.org/11/redhat/rhel-6-x86_64/postgresql11-contrib-11.2-2PGDG.rhel6.x86_64.rpm
[root@jr-sandbox pg_11]# wget https://yum.postgresql.org/11/redhat/rhel-6-x86_64/postgresql11-debuginfo-11.2-2PGDG.rhel6.x86_64.rpm
[root@jr-sandbox pg_11]# wget https://yum.postgresql.org/11/redhat/rhel-6-x86_64/postgresql11-devel-11.2-2PGDG.rhel6.x86_64.rpm
[root@jr-sandbox pg_11]# wget https://yum.postgresql.org/11/redhat/rhel-6-x86_64/postgresql11-docs-11.2-2PGDG.rhel6.x86_64.rpm
[root@jr-sandbox pg_11]# wget https://yum.postgresql.org/11/redhat/rhel-6-x86_64/postgresql11-libs-11.2-2PGDG.rhel6.x86_64.rpm
[root@jr-sandbox pg_11]# wget https://yum.postgresql.org/11/redhat/rhel-6-x86_64/postgresql11-server-11.2-2PGDG.rhel6.x86_64.rpm

[root@jr-sandbox pg_11]# wget https://yum.postgresql.org/11/redhat/rhel-6-x86_64/postgresql11-11.2-2PGDG.rhel6.x86_64.rpm

[root@jr-sandbox pg_11]# wget https://yum.postgresql.org/11/redhat/rhel-6-x86_64/postgresql11-contrib-11.2-2PGDG.rhel6.x86_64.rpm

[root@jr-sandbox pg_11]# wget https://yum.postgresql.org/11/redhat/rhel-6-x86_64/postgresql11-debuginfo-11.2-2PGDG.rhel6.x86_64.rpm

[root@jr-sandbox pg_11]# wget https://yum.postgresql.org/11/redhat/rhel-6-x86_64/postgresql11-devel-11.2-2PGDG.rhel6.x86_64.rpm

[root@jr-sandbox pg_11]# wget https://yum.postgresql.org/11/redhat/rhel-6-x86_64/postgresql11-docs-11.2-2PGDG.rhel6.x86_64.rpm

[root@jr-sandbox pg_11]# wget https://yum.postgresql.org/11/redhat/rhel-6-x86_64/postgresql11-libs-11.2-2PGDG.rhel6.x86_64.rpm

[root@jr-sandbox pg_11]# wget https://yum.postgresql.org/11/redhat/rhel-6-x86_64/postgresql11-server-11.2-2PGDG.rhel6.x86_64.rpm

Install the RPMS for postgres11 that we just downloaded

[root@jr-sandbox pg_11]# rpm -ivh postgresql11-*
warning: postgresql11-11.2-2PGDG.rhel6.x86_64.rpm: Header V4 DSA/SHA1 Signature, key ID 442df0f8: NOKEY
Preparing...                ########################################### [100%]
   1:postgresql11-libs      ########################################### [ 14%]
   2:postgresql11           ########################################### [ 29%]
   3:postgresql11-contrib   ########################################### [ 43%]
   4:postgresql11-devel     ########################################### [ 57%]
   5:postgresql11-server    ########################################### [ 71%]
   6:postgresql11-docs      ########################################### [ 86%]
   7:postgresql11-debuginfo ########################################### [100%]

[root@jr-sandbox pg_11]# rpm -ivh postgresql11-*

warning: postgresql11-11.2-2PGDG.rhel6.x86_64.rpm: Header V4 DSA/SHA1 Signature, key ID 442df0f8: NOKEY

Preparing... ########################################### [100%]

1:postgresql11-libs ########################################### [ 14%]

2:postgresql11 ########################################### [ 29%]

3:postgresql11-contrib ########################################### [ 43%]

4:postgresql11-devel ########################################### [ 57%]

5:postgresql11-server ########################################### [ 71%]

6:postgresql11-docs ########################################### [ 86%]

7:postgresql11-debuginfo ########################################### [100%]

We will create the data location for postgres11 where the files will be hardlinked and not copied. You can see the tablespace disk locations and the index locations from the pg9.3 install. Its important to create the new pg11 data directory on the same filesystem since we will be using the –link parameter and it uses hardlinks which will not traverse filesystems.

[root@jr-sandbox ~]# cd /data1/
[root@jr-sandbox data1]# ls -ltr
total 12
drwx------  3 postgres postgres 4096 Apr 16 22:57 ts_index1
drwx------  3 postgres postgres 4096 Apr 16 22:58 ts_data2
drwx------ 16 postgres postgres 4096 Apr 16 23:02 data93
[root@jr-sandbox data1]# mkdir data11
[root@jr-sandbox data1]# chown -R postgres:postgres data11/

[root@jr-sandbox ~]# cd /data1/

[root@jr-sandbox data1]# ls -ltr

total 12

drwx------ 3 postgres postgres 4096 Apr 16 22:57 ts_index1

drwx------ 3 postgres postgres 4096 Apr 16 22:58 ts_data2

drwx------ 16 postgres postgres 4096 Apr 16 23:02 data93

[root@jr-sandbox data1]# mkdir data11

[root@jr-sandbox data1]# chown -R postgres:postgres data11/

We will need to init a postgres database in our new location on disk data11.

[root@jr-sandbox ~]# su - postgres
-bash-4.1$ /usr/pgsql-11/bin/initdb -D /data1/data11
The files belonging to this database system will be owned by user "postgres".
This user must also own the server process.

The database cluster will be initialized with locale "en_US.UTF-8".
The default database encoding has accordingly been set to "UTF8".
The default text search configuration will be set to "english".

Data page checksums are disabled.

fixing permissions on existing directory /data1/data11 ... ok
creating subdirectories ... ok
selecting default max_connections ... 100
selecting default shared_buffers ... 128MB
selecting dynamic shared memory implementation ... posix
creating configuration files ... ok
running bootstrap script ... ok
performing post-bootstrap initialization ... ok
syncing data to disk ... ok

WARNING: enabling "trust" authentication for local connections
You can change this by editing pg_hba.conf or using the option -A, or
--auth-local and --auth-host, the next time you run initdb.

Success. You can now start the database server using:

    /usr/pgsql-11/bin/pg_ctl -D /data1/data11 -l logfile start

-bash-4.1$

[root@jr-sandbox ~]# su - postgres

-bash-4.1$ /usr/pgsql-11/bin/initdb -D /data1/data11

The files belonging to this database system will be owned by user "postgres".

This user must also own the server process.

The database cluster will be initialized with locale "en_US.UTF-8".

The default database encoding has accordingly been set to "UTF8".

The default text search configuration will be set to "english".

Data page checksums are disabled.

fixing permissions on existing directory /data1/data11 ... ok

creating subdirectories ... ok

selecting default max_connections ... 100

selecting default shared_buffers ... 128MB

selecting dynamic shared memory implementation ... posix

creating configuration files ... ok

running bootstrap script ... ok

performing post-bootstrap initialization ... ok

syncing data to disk ... ok

WARNING: enabling "trust" authentication for local connections

You can change this by editing pg_hba.conf or using the option -A, or

--auth-local and --auth-host, the next time you run initdb.

Success. You can now start the database server using:

/usr/pgsql-11/bin/pg_ctl -D /data1/data11 -l logfile start

-bash-4.1$

Now we are ready to stop pg9.3 and check pg_upgrade compatibility. pg_upgrade ships with a –check argument that will check the compatibility of the clusters and be sure the upgrade will work before changing any files. Lets stop pg9.3 and run the pg_upgrade with the –check parameter.

[root@jr-sandbox ~]# /etc/init.d/postgresql-9.3 stop
Stopping postgresql-9.3 service:                           [  OK  ]

[root@jr-sandbox ~]# su - postgres

-bash-4.1$ /usr/pgsql-11/bin/pg_upgrade --link --old-bindir=/usr/pgsql-9.3/bin/ --new-bindir=/usr/pgsql-11/bin/ --old-datadir=/data1/data93/ --new-datadir=/data1/data11/ --check
Performing Consistency Checks
-----------------------------
Checking cluster versions                                   ok
Checking database user is the install user                  ok
Checking database connection settings                       ok
Checking for prepared transactions                          ok
Checking for reg* data types in user tables                 ok
Checking for contrib/isn with bigint-passing mismatch       ok
Checking for invalid "unknown" user columns                 ok
Checking for hash indexes                                   ok
Checking for roles starting with "pg_"                      ok
Checking for incompatible "line" data type                  ok
Checking for presence of required libraries                 ok
Checking database user is the install user                  ok
Checking for prepared transactions                          ok

*Clusters are compatible*

[root@jr-sandbox ~]# /etc/init.d/postgresql-9.3 stop

Stopping postgresql-9.3 service: [ OK ]

[root@jr-sandbox ~]# su - postgres

-bash-4.1$ /usr/pgsql-11/bin/pg_upgrade --link --old-bindir=/usr/pgsql-9.3/bin/ --new-bindir=/usr/pgsql-11/bin/ --old-datadir=/data1/data93/ --new-datadir=/data1/data11/ --check

Performing Consistency Checks

-----------------------------

Checking cluster versions ok

Checking database user is the install user ok

Checking database connection settings ok

Checking for prepared transactions ok

Checking for reg* data types in user tables ok

Checking for contrib/isn with bigint-passing mismatch ok

Checking for invalid "unknown" user columns ok

Checking for hash indexes ok

Checking for roles starting with "pg_" ok

Checking for incompatible "line" data type ok

Checking for presence of required libraries ok

Checking database user is the install user ok

Checking for prepared transactions ok

*Clusters are compatible*

Ok checks have passed and the cluster versions are ready for upgrade, lets run this without the –check parameter and upgrade postgres.

[root@jr-sandbox ~]# su - postgres
-bash-4.1$ /usr/pgsql-11/bin/pg_upgrade --link --old-bindir=/usr/pgsql-9.3/bin/ --new-bindir=/usr/pgsql-11/bin/ --old-datadir=/data1/data93/ --new-datadir=/data1/data11/
Performing Consistency Checks
-----------------------------
Checking cluster versions                                   ok
Checking database user is the install user                  ok
Checking database connection settings                       ok
Checking for prepared transactions                          ok
Checking for reg* data types in user tables                 ok
Checking for contrib/isn with bigint-passing mismatch       ok
Checking for invalid "unknown" user columns                 ok
Checking for roles starting with "pg_"                      ok
Checking for incompatible "line" data type                  ok
Creating dump of global objects                             ok
Creating dump of database schemas
                                                            ok
Checking for presence of required libraries                 ok
Checking database user is the install user                  ok
Checking for prepared transactions                          ok

If pg_upgrade fails after this point, you must re-initdb the
new cluster before continuing.

Performing Upgrade
------------------
Analyzing all rows in the new cluster                       ok
Freezing all rows in the new cluster                        ok
Deleting files from new pg_xact                             ok
Copying old pg_clog to new server                           ok
Setting next transaction ID and epoch for new cluster       ok
Deleting files from new pg_multixact/offsets                ok
Copying old pg_multixact/offsets to new server              ok
Deleting files from new pg_multixact/members                ok
Copying old pg_multixact/members to new server              ok
Setting next multixact ID and offset for new cluster        ok
Resetting WAL archives                                      ok
Setting frozenxid and minmxid counters in new cluster       ok
Restoring global objects in the new cluster                 ok
Restoring database schemas in the new cluster
                                                            ok
Adding ".old" suffix to old global/pg_control               ok

If you want to start the old cluster, you will need to remove
the ".old" suffix from /data1/data93/global/pg_control.old.
Because "link" mode was used, the old cluster cannot be safely
started once the new cluster has been started.

Linking user relation files
                                                            ok
Setting next OID for new cluster                            ok
Sync data directory to disk                                 ok
Creating script to analyze new cluster                      ok
Creating script to delete old cluster                       ok
Checking for hash indexes                                   ok

Upgrade Complete
----------------
Optimizer statistics are not transferred by pg_upgrade so,
once you start the new server, consider running:
    ./analyze_new_cluster.sh

Running this script will delete the old cluster's data files:
    ./delete_old_cluster.sh
-bash-4.1$

[root@jr-sandbox ~]# su - postgres

-bash-4.1$ /usr/pgsql-11/bin/pg_upgrade --link --old-bindir=/usr/pgsql-9.3/bin/ --new-bindir=/usr/pgsql-11/bin/ --old-datadir=/data1/data93/ --new-datadir=/data1/data11/

Performing Consistency Checks

-----------------------------

Checking cluster versions ok

Checking database user is the install user ok

Checking database connection settings ok

Checking for prepared transactions ok

Checking for reg* data types in user tables ok

Checking for contrib/isn with bigint-passing mismatch ok

Checking for invalid "unknown" user columns ok

Checking for roles starting with "pg_" ok

Checking for incompatible "line" data type ok

Creating dump of global objects ok

Creating dump of database schemas

Checking for presence of required libraries ok

Checking database user is the install user ok

Checking for prepared transactions ok

If pg_upgrade fails after this point, you must re-initdb the

new cluster before continuing.

Performing Upgrade

------------------

Analyzing all rows in the new cluster ok

Freezing all rows in the new cluster ok

Deleting files from new pg_xact ok

Copying old pg_clog to new server ok

Setting next transaction ID and epoch for new cluster ok

Deleting files from new pg_multixact/offsets ok

Copying old pg_multixact/offsets to new server ok

Deleting files from new pg_multixact/members ok

Copying old pg_multixact/members to new server ok

Setting next multixact ID and offset for new cluster ok

Resetting WAL archives ok

Setting frozenxid and minmxid counters in new cluster ok

Restoring global objects in the new cluster ok

Restoring database schemas in the new cluster

Adding ".old" suffix to old global/pg_control ok

If you want to start the old cluster, you will need to remove

the ".old" suffix from /data1/data93/global/pg_control.old.

Because "link" mode was used, the old cluster cannot be safely

started once the new cluster has been started.

Linking user relation files

Setting next OID for new cluster ok

Sync data directory to disk ok

Creating script to analyze new cluster ok

Creating script to delete old cluster ok

Checking for hash indexes ok

Upgrade Complete

----------------

Optimizer statistics are not transferred by pg_upgrade so,

once you start the new server, consider running:

./analyze_new_cluster.sh

Running this script will delete the old cluster's data files:

./delete_old_cluster.sh

-bash-4.1$

OK the pg_upgrade code completed successfully and has generated 2 scripts. One to analyze the new pg11 cluster to get stats for the query planner and vacuum. The other to cleanup and remove the old pg9.3 locations on disk. Let’s start pg11, we will need to create an override file to tell pg11 where the data11 data lives, then we should be able to start postgres and check some things and verify our upgrade.

[root@jr-sandbox ~]# cd /etc/sysconfig/pgsql/
[root@jr-sandbox pgsql]# cp postgresql-9.3 postgresql-11
[root@jr-sandbox pgsql]# vim postgresql-11 
[root@jr-sandbox pgsql]# cat postgresql-11 
PGDATA=/data1/data11
PGLOG=/data1/data11/pgstartup.log

[root@jr-sandbox pgsql]# /etc/init.d/postgresql-11 start
Starting postgresql-11 service:                            [  OK  ]

[root@jr-sandbox ~]# cd /etc/sysconfig/pgsql/

[root@jr-sandbox pgsql]# cp postgresql-9.3 postgresql-11

[root@jr-sandbox pgsql]# vim postgresql-11

[root@jr-sandbox pgsql]# cat postgresql-11

PGDATA=/data1/data11

PGLOG=/data1/data11/pgstartup.log

[root@jr-sandbox pgsql]# /etc/init.d/postgresql-11 start

Starting postgresql-11 service: [ OK ]

[root@jr-sandbox pgsql]# su - postgres
-bash-4.1$ ps -ef| grep postgres| head -n 1
postgres 31047     1  0 23:30 ?        00:00:00 /usr/pgsql-11/bin/postmaster -D /data1/data11
-bash-4.1$ psql 
psql (11.2)
Type "help" for help.

postgres=# select spcname
      ,pg_tablespace_location(oid) 
from   pg_tablespace;
  spcname   | pg_tablespace_location 
------------+------------------------
 pg_default | 
 pg_global  | 
 index1     | /data1/ts_index1
 data2      | /data1/ts_data2
(4 rows)

[root@jr-sandbox pgsql]# su - postgres

-bash-4.1$ ps -ef| grep postgres| head -n 1

postgres 31047 1 0 23:30 ? 00:00:00 /usr/pgsql-11/bin/postmaster -D /data1/data11

-bash-4.1$ psql

psql (11.2)

Type "help" for help.

postgres=# select spcname

,pg_tablespace_location(oid)

from pg_tablespace;

spcname | pg_tablespace_location

------------+------------------------

pg_default |

pg_global |

index1 | /data1/ts_index1

data2 | /data1/ts_data2

(4 rows)

OK we can see we have pg11 running and we can run the generated scripts to cleanup, but lets take a look at the data and index directories to see what the upgrade produced.

[root@jr-sandbox ~]# cd /data1/
[root@jr-sandbox data1]# ls
data11  data93  ts_data2  ts_index1
[root@jr-sandbox data1]# cd data11/
[root@jr-sandbox data11]# ls -l
total 132
drwx------ 5 postgres postgres  4096 Apr 16 23:19 base
-rw------- 1 postgres postgres    30 Apr 16 23:30 current_logfiles
drwx------ 2 postgres postgres  4096 Apr 16 23:32 global
drwx------ 2 postgres postgres  4096 Apr 16 23:10 log
drwx------ 2 postgres postgres  4096 Apr 16 23:09 pg_commit_ts
drwx------ 2 postgres postgres  4096 Apr 16 23:09 pg_dynshmem
-rw------- 1 postgres postgres  4513 Apr 16 23:09 pg_hba.conf
-rw------- 1 postgres postgres  1636 Apr 16 23:09 pg_ident.conf
drwx------ 4 postgres postgres  4096 Apr 16 23:35 pg_logical
drwx------ 4 postgres postgres  4096 Apr 16 23:19 pg_multixact
drwx------ 2 postgres postgres  4096 Apr 16 23:30 pg_notify
drwx------ 2 postgres postgres  4096 Apr 16 23:09 pg_replslot
drwx------ 2 postgres postgres  4096 Apr 16 23:09 pg_serial
drwx------ 2 postgres postgres  4096 Apr 16 23:09 pg_snapshots
-rw------- 1 postgres postgres   469 Apr 16 23:30 pgstartup.log
drwx------ 2 postgres postgres  4096 Apr 16 23:30 pg_stat
drwx------ 2 postgres postgres  4096 Apr 16 23:45 pg_stat_tmp
drwx------ 2 postgres postgres  4096 Apr 16 23:09 pg_subtrans
drwx------ 2 postgres postgres  4096 Apr 16 23:19 pg_tblspc
drwx------ 2 postgres postgres  4096 Apr 16 23:09 pg_twophase
-rw------- 1 postgres postgres     3 Apr 16 23:09 PG_VERSION
drwx------ 3 postgres postgres  4096 Apr 16 23:19 pg_wal
drwx------ 2 postgres postgres  4096 Apr 16 23:19 pg_xact
-rw------- 1 postgres postgres    88 Apr 16 23:09 postgresql.auto.conf
-rw------- 1 postgres postgres 23863 Apr 16 23:09 postgresql.conf
-rw------- 1 postgres postgres    48 Apr 16 23:30 postmaster.opts
-rw------- 1 postgres postgres    95 Apr 16 23:30 postmaster.pid
[root@jr-sandbox data11]# cd ../ts_index1/
[root@jr-sandbox ts_index1]# ls -l
total 8
drwx------ 2 postgres postgres 4096 Apr 16 23:19 PG_11_201809051
drwx------ 2 postgres postgres 4096 Apr 16 22:57 PG_9.3_201306121
[root@jr-sandbox ts_index1]# cd ../ts_data2/
You have mail in /var/spool/mail/root
[root@jr-sandbox ts_data2]# ls -l
total 8
drwx------ 2 postgres postgres 4096 Apr 16 23:19 PG_11_201809051
drwx------ 2 postgres postgres 4096 Apr 16 22:58 PG_9.3_201306121
<strong>

[root@jr-sandbox ~]# cd /data1/

[root@jr-sandbox data1]# ls

data11 data93 ts_data2 ts_index1

[root@jr-sandbox data1]# cd data11/

[root@jr-sandbox data11]# ls -l

total 132

drwx------ 5 postgres postgres 4096 Apr 16 23:19 base

-rw------- 1 postgres postgres 30 Apr 16 23:30 current_logfiles

drwx------ 2 postgres postgres 4096 Apr 16 23:32 global

drwx------ 2 postgres postgres 4096 Apr 16 23:10 log

drwx------ 2 postgres postgres 4096 Apr 16 23:09 pg_commit_ts

drwx------ 2 postgres postgres 4096 Apr 16 23:09 pg_dynshmem

-rw------- 1 postgres postgres 4513 Apr 16 23:09 pg_hba.conf

-rw------- 1 postgres postgres 1636 Apr 16 23:09 pg_ident.conf

drwx------ 4 postgres postgres 4096 Apr 16 23:35 pg_logical

drwx------ 4 postgres postgres 4096 Apr 16 23:19 pg_multixact

drwx------ 2 postgres postgres 4096 Apr 16 23:30 pg_notify

drwx------ 2 postgres postgres 4096 Apr 16 23:09 pg_replslot

drwx------ 2 postgres postgres 4096 Apr 16 23:09 pg_serial

drwx------ 2 postgres postgres 4096 Apr 16 23:09 pg_snapshots

-rw------- 1 postgres postgres 469 Apr 16 23:30 pgstartup.log

drwx------ 2 postgres postgres 4096 Apr 16 23:30 pg_stat

drwx------ 2 postgres postgres 4096 Apr 16 23:45 pg_stat_tmp

drwx------ 2 postgres postgres 4096 Apr 16 23:09 pg_subtrans

drwx------ 2 postgres postgres 4096 Apr 16 23:19 pg_tblspc

drwx------ 2 postgres postgres 4096 Apr 16 23:09 pg_twophase

-rw------- 1 postgres postgres 3 Apr 16 23:09 PG_VERSION

drwx------ 3 postgres postgres 4096 Apr 16 23:19 pg_wal

drwx------ 2 postgres postgres 4096 Apr 16 23:19 pg_xact

-rw------- 1 postgres postgres 88 Apr 16 23:09 postgresql.auto.conf

-rw------- 1 postgres postgres 23863 Apr 16 23:09 postgresql.conf

-rw------- 1 postgres postgres 48 Apr 16 23:30 postmaster.opts

-rw------- 1 postgres postgres 95 Apr 16 23:30 postmaster.pid

[root@jr-sandbox data11]# cd ../ts_index1/

[root@jr-sandbox ts_index1]# ls -l

total 8

drwx------ 2 postgres postgres 4096 Apr 16 23:19 PG_11_201809051

drwx------ 2 postgres postgres 4096 Apr 16 22:57 PG_9.3_201306121

[root@jr-sandbox ts_index1]# cd ../ts_data2/

You have mail in /var/spool/mail/root

[root@jr-sandbox ts_data2]# ls -l

total 8

drwx------ 2 postgres postgres 4096 Apr 16 23:19 PG_11_201809051

drwx------ 2 postgres postgres 4096 Apr 16 22:58 PG_9.3_201306121

We can view the shell scripts that pg_upgrade produced and cleanup the old pg9.3 references and run the analyze vacuums.

[root@jr-sandbox ~]# su - postgres
-bash-4.1$ ls
11  9.3  analyze_new_cluster.sh  delete_old_cluster.sh
-bash-4.1$ cat delete_old_cluster.sh 
#!/bin/sh

rm -rf '/data1/data93'
rm -rf '/data1/ts_index1/PG_9.3_201306121'
rm -rf '/data1/ts_data2/PG_9.3_201306121'
-bash-4.1$ cat analyze_new_cluster.sh 
#!/bin/sh

echo 'This script will generate minimal optimizer statistics rapidly'
echo 'so your system is usable, and then gather statistics twice more'
echo 'with increasing accuracy.  When it is done, your system will'
echo 'have the default level of optimizer statistics.'
echo

echo 'If you have used ALTER TABLE to modify the statistics target for'
echo 'any tables, you might want to remove them and restore them after'
echo 'running this script because they will delay fast statistics generation.'
echo

echo 'If you would like default statistics as quickly as possible, cancel'
echo 'this script and run:'
echo '    "/usr/pgsql-11/bin/vacuumdb" --all --analyze-only'
echo

"/usr/pgsql-11/bin/vacuumdb" --all --analyze-in-stages
echo

echo 'Done'

[root@jr-sandbox ~]# su - postgres

-bash-4.1$ ls

11 9.3 analyze_new_cluster.sh delete_old_cluster.sh

-bash-4.1$ cat delete_old_cluster.sh

#!/bin/sh

rm -rf '/data1/data93'

rm -rf '/data1/ts_index1/PG_9.3_201306121'

rm -rf '/data1/ts_data2/PG_9.3_201306121'

-bash-4.1$ cat analyze_new_cluster.sh

#!/bin/sh

echo 'This script will generate minimal optimizer statistics rapidly'

echo 'so your system is usable, and then gather statistics twice more'

echo 'with increasing accuracy. When it is done, your system will'

echo 'have the default level of optimizer statistics.'

echo

echo 'If you have used ALTER TABLE to modify the statistics target for'

echo 'any tables, you might want to remove them and restore them after'

echo 'running this script because they will delay fast statistics generation.'

echo

echo 'If you would like default statistics as quickly as possible, cancel'

echo 'this script and run:'

echo ' "/usr/pgsql-11/bin/vacuumdb" --all --analyze-only'

echo

"/usr/pgsql-11/bin/vacuumdb" --all --analyze-in-stages

echo

echo 'Done'

This looks good, lets execute them and cleanup any pg9.3 references as well as remove the pg9.3 rpms.

[root@jr-sandbox data1]# su - postgres
-bash-4.1$ ./delete_old_cluster.sh 
-bash-4.1$ ./analyze_new_cluster.sh 
This script will generate minimal optimizer statistics rapidly
so your system is usable, and then gather statistics twice more
with increasing accuracy.  When it is done, your system will
have the default level of optimizer statistics.

If you have used ALTER TABLE to modify the statistics target for
any tables, you might want to remove them and restore them after
running this script because they will delay fast statistics generation.

If you would like default statistics as quickly as possible, cancel
this script and run:
    "/usr/pgsql-11/bin/vacuumdb" --all --analyze-only

vacuumdb: processing database "postgres": Generating minimal optimizer statistics (1 target)
vacuumdb: processing database "template1": Generating minimal optimizer statistics (1 target)
vacuumdb: processing database "postgres": Generating medium optimizer statistics (10 targets)
vacuumdb: processing database "template1": Generating medium optimizer statistics (10 targets)
vacuumdb: processing database "postgres": Generating default (full) optimizer statistics
vacuumdb: processing database "template1": Generating default (full) optimizer statistics

Done
-bash-4.1$

[root@jr-sandbox data1]# su - postgres

-bash-4.1$ ./delete_old_cluster.sh

-bash-4.1$ ./analyze_new_cluster.sh

This script will generate minimal optimizer statistics rapidly

so your system is usable, and then gather statistics twice more

with increasing accuracy. When it is done, your system will

have the default level of optimizer statistics.

If you have used ALTER TABLE to modify the statistics target for

any tables, you might want to remove them and restore them after

running this script because they will delay fast statistics generation.

If you would like default statistics as quickly as possible, cancel

this script and run:

"/usr/pgsql-11/bin/vacuumdb" --all --analyze-only

vacuumdb: processing database "postgres": Generating minimal optimizer statistics (1 target)

vacuumdb: processing database "template1": Generating minimal optimizer statistics (1 target)

vacuumdb: processing database "postgres": Generating medium optimizer statistics (10 targets)

vacuumdb: processing database "template1": Generating medium optimizer statistics (10 targets)

vacuumdb: processing database "postgres": Generating default (full) optimizer statistics

vacuumdb: processing database "template1": Generating default (full) optimizer statistics

Done

-bash-4.1$

Remove the pg9.3 rpms and references, set the new data location in the .pgsql_profile.

[root@jr-sandbox ~]# yum remove postgresql93*
Loaded plugins: fastestmirror
Setting up Remove Process
Resolving Dependencies
--> Running transaction check
---> Package postgresql93.x86_64 0:9.3.24-1PGDG.rhel6 will be erased
---> Package postgresql93-contrib.x86_64 0:9.3.24-1PGDG.rhel6 will be erased
---> Package postgresql93-debuginfo.x86_64 0:9.3.24-1PGDG.rhel6 will be erased
---> Package postgresql93-devel.x86_64 0:9.3.24-1PGDG.rhel6 will be erased
---> Package postgresql93-docs.x86_64 0:9.3.24-1PGDG.rhel6 will be erased
---> Package postgresql93-libs.x86_64 0:9.3.24-1PGDG.rhel6 will be erased
---> Package postgresql93-server.x86_64 0:9.3.24-1PGDG.rhel6 will be erased
--> Finished Dependency Resolution

Dependencies Resolved

===================================================================================================================================================================================================================
 Package                                                  Arch                                     Version                                              Repository                                            Size
===================================================================================================================================================================================================================
Removing:
 postgresql93                                             x86_64                                   9.3.24-1PGDG.rhel6                                   @affinity6-prod-db                                   5.3 M
 postgresql93-contrib                                     x86_64                                   9.3.24-1PGDG.rhel6                                   @affinity6-prod-db                                   1.7 M
 postgresql93-debuginfo                                   x86_64                                   9.3.24-1PGDG.rhel6                                   @affinity6-prod-db                                    28 M
 postgresql93-devel                                       x86_64                                   9.3.24-1PGDG.rhel6                                   @affinity6-prod-db                                   6.8 M
 postgresql93-docs                                        x86_64                                   9.3.24-1PGDG.rhel6                                   @affinity6-prod-db                                    31 M
 postgresql93-libs                                        x86_64                                   9.3.24-1PGDG.rhel6                                   @affinity6-prod-db                                   632 k
 postgresql93-server                                      x86_64                                   9.3.24-1PGDG.rhel6                                   @affinity6-prod-db                                    16 M

Transaction Summary
===================================================================================================================================================================================================================
Remove        7 Package(s)

Installed size: 89 M
Is this ok [y/N]: y
Downloading Packages:
Running rpm_check_debug
Running Transaction Test
Transaction Test Succeeded
Running Transaction
Warning: RPMDB altered outside of yum.
  Erasing    : postgresql93-debuginfo-9.3.24-1PGDG.rhel6.x86_64                                                                                                                                                1/7 
  Erasing    : postgresql93-devel-9.3.24-1PGDG.rhel6.x86_64                                                                                                                                                    2/7 
  Erasing    : postgresql93-server-9.3.24-1PGDG.rhel6.x86_64                                                                                                                                                   3/7 
  Erasing    : postgresql93-contrib-9.3.24-1PGDG.rhel6.x86_64                                                                                                                                                  4/7 
  Erasing    : postgresql93-9.3.24-1PGDG.rhel6.x86_64                                                                                                                                                          5/7 
  Erasing    : postgresql93-libs-9.3.24-1PGDG.rhel6.x86_64                                                                                                                                                     6/7 
  Erasing    : postgresql93-docs-9.3.24-1PGDG.rhel6.x86_64                                                                                                                                                     7/7 
  Verifying  : postgresql93-debuginfo-9.3.24-1PGDG.rhel6.x86_64                                                                                                                                                1/7 
  Verifying  : postgresql93-9.3.24-1PGDG.rhel6.x86_64                                                                                                                                                          2/7 
  Verifying  : postgresql93-docs-9.3.24-1PGDG.rhel6.x86_64                                                                                                                                                     3/7 
  Verifying  : postgresql93-contrib-9.3.24-1PGDG.rhel6.x86_64                                                                                                                                                  4/7 
  Verifying  : postgresql93-server-9.3.24-1PGDG.rhel6.x86_64                                                                                                                                                   5/7 
  Verifying  : postgresql93-devel-9.3.24-1PGDG.rhel6.x86_64                                                                                                                                                    6/7 
  Verifying  : postgresql93-libs-9.3.24-1PGDG.rhel6.x86_64                                                                                                                                                     7/7 

Removed:
  postgresql93.x86_64 0:9.3.24-1PGDG.rhel6          postgresql93-contrib.x86_64 0:9.3.24-1PGDG.rhel6     postgresql93-debuginfo.x86_64 0:9.3.24-1PGDG.rhel6     postgresql93-devel.x86_64 0:9.3.24-1PGDG.rhel6    
  postgresql93-docs.x86_64 0:9.3.24-1PGDG.rhel6     postgresql93-libs.x86_64 0:9.3.24-1PGDG.rhel6        postgresql93-server.x86_64 0:9.3.24-1PGDG.rhel6       

Complete!
[root@jr-sandbox ~]#

[root@jr-sandbox ~]# yum remove postgresql93*

Loaded plugins: fastestmirror

Setting up Remove Process

Resolving Dependencies

--> Running transaction check

---> Package postgresql93.x86_64 0:9.3.24-1PGDG.rhel6 will be erased

---> Package postgresql93-contrib.x86_64 0:9.3.24-1PGDG.rhel6 will be erased

---> Package postgresql93-debuginfo.x86_64 0:9.3.24-1PGDG.rhel6 will be erased

---> Package postgresql93-devel.x86_64 0:9.3.24-1PGDG.rhel6 will be erased

---> Package postgresql93-docs.x86_64 0:9.3.24-1PGDG.rhel6 will be erased

---> Package postgresql93-libs.x86_64 0:9.3.24-1PGDG.rhel6 will be erased

---> Package postgresql93-server.x86_64 0:9.3.24-1PGDG.rhel6 will be erased

--> Finished Dependency Resolution

Dependencies Resolved

===================================================================================================================================================================================================================

Package Arch Version Repository Size

Removing:

postgresql93 x86_64 9.3.24-1PGDG.rhel6 @affinity6-prod-db 5.3 M

postgresql93-contrib x86_64 9.3.24-1PGDG.rhel6 @affinity6-prod-db 1.7 M

postgresql93-debuginfo x86_64 9.3.24-1PGDG.rhel6 @affinity6-prod-db 28 M

postgresql93-devel x86_64 9.3.24-1PGDG.rhel6 @affinity6-prod-db 6.8 M

postgresql93-docs x86_64 9.3.24-1PGDG.rhel6 @affinity6-prod-db 31 M

postgresql93-libs x86_64 9.3.24-1PGDG.rhel6 @affinity6-prod-db 632 k

postgresql93-server x86_64 9.3.24-1PGDG.rhel6 @affinity6-prod-db 16 M

Transaction Summary

Remove 7 Package(s)

Installed size: 89 M

Is this ok [y/N]: y

Downloading Packages:

Running rpm_check_debug

Running Transaction Test

Transaction Test Succeeded

Running Transaction

Warning: RPMDB altered outside of yum.

Erasing : postgresql93-debuginfo-9.3.24-1PGDG.rhel6.x86_64 1/7

Erasing : postgresql93-devel-9.3.24-1PGDG.rhel6.x86_64 2/7

Erasing : postgresql93-server-9.3.24-1PGDG.rhel6.x86_64 3/7

Erasing : postgresql93-contrib-9.3.24-1PGDG.rhel6.x86_64 4/7

Erasing : postgresql93-9.3.24-1PGDG.rhel6.x86_64 5/7

Erasing : postgresql93-libs-9.3.24-1PGDG.rhel6.x86_64 6/7

Erasing : postgresql93-docs-9.3.24-1PGDG.rhel6.x86_64 7/7

Verifying : postgresql93-debuginfo-9.3.24-1PGDG.rhel6.x86_64 1/7

Verifying : postgresql93-9.3.24-1PGDG.rhel6.x86_64 2/7

Verifying : postgresql93-docs-9.3.24-1PGDG.rhel6.x86_64 3/7

Verifying : postgresql93-contrib-9.3.24-1PGDG.rhel6.x86_64 4/7

Verifying : postgresql93-server-9.3.24-1PGDG.rhel6.x86_64 5/7

Verifying : postgresql93-devel-9.3.24-1PGDG.rhel6.x86_64 6/7

Verifying : postgresql93-libs-9.3.24-1PGDG.rhel6.x86_64 7/7

Removed:

postgresql93.x86_64 0:9.3.24-1PGDG.rhel6 postgresql93-contrib.x86_64 0:9.3.24-1PGDG.rhel6 postgresql93-debuginfo.x86_64 0:9.3.24-1PGDG.rhel6 postgresql93-devel.x86_64 0:9.3.24-1PGDG.rhel6

postgresql93-docs.x86_64 0:9.3.24-1PGDG.rhel6 postgresql93-libs.x86_64 0:9.3.24-1PGDG.rhel6 postgresql93-server.x86_64 0:9.3.24-1PGDG.rhel6

Complete!

[root@jr-sandbox ~]#

[root@jr-sandbox ~]# cd /etc/sysconfig/pgsql/
You have new mail in /var/spool/mail/root
[root@jr-sandbox pgsql]# ls
postgresql-11  postgresql-9.3
[root@jr-sandbox pgsql]# rm -f postgresql-9.3 
[root@jr-sandbox pgsql]# su - postgres
-bash-4.1$ ls
11  9.3  analyze_new_cluster.sh  delete_old_cluster.sh
-bash-4.1$ rm -rf 9.3

[root@jr-sandbox ~]# cd /etc/sysconfig/pgsql/

You have new mail in /var/spool/mail/root

[root@jr-sandbox pgsql]# ls

postgresql-11 postgresql-9.3

[root@jr-sandbox pgsql]# rm -f postgresql-9.3

[root@jr-sandbox pgsql]# su - postgres

-bash-4.1$ ls

11 9.3 analyze_new_cluster.sh delete_old_cluster.sh

-bash-4.1$ rm -rf 9.3

You can now view the pg_hba.conf and postgresql.conf you saved in /root and add whats needed to the new pg11 configs.

That’s it!!

SINOPIA NPM allow connections to GITHUB as well as the NPM registry

October 11, 2018October 11, 2018 adminLeave a comment

SINOPIA LINK HERE
We use SINOPIA as a proxy on our internal network behind the firewall to allow users to install NODE packages without an internet connection. We basically run sinopia on a machine that has access to the internet and the clients point to the server to install packages that are not locally available. We have been running into issues where installs that needed access to github would fail with something like this:

[15:29:07] user1@sb-user1:~/app/mc_api/lib/reports $ npm install --save slack/client --loglevel verbose
npm info it worked if it ends with ok
npm verb cli [ '/home/user1/node_local_install/.nvm/versions/node/v8.9.4/bin/node',
npm verb cli   '/home/user1/node_local_install/.nvm/versions/node/v8.9.4/bin/npm',
npm verb cli   'install',
npm verb cli   '--save',
npm verb cli   'slack/client',
npm verb cli   '--loglevel',
npm verb cli   'verbose' ]
npm info using npm@6.4.1
npm info using node@v8.9.4
npm verb npm-session f40f30f7bf0339f9
npm timing stage:rollbackFailedOptional Completed in 1ms
npm timing stage:runTopLevelLifecycles Completed in 1114ms
npm verb stack Error: exited with error code: 128
npm verb stack     at ChildProcess.<anonymous> (/home/user1/node_local_install/.nvm/versions/node/v8.9.4/lib/node_modules/npm/node_modules/pacote/lib/util/finished.js:12:19)
npm verb stack     at emitTwo (events.js:126:13)
npm verb stack     at ChildProcess.emit (events.js:214:7)
npm verb stack     at maybeClose (internal/child_process.js:925:16)
npm verb stack     at Socket.stream.socket.on (internal/child_process.js:346:11)
npm verb stack     at emitOne (events.js:116:13)
npm verb stack     at Socket.emit (events.js:211:7)
npm verb stack     at Pipe._handle.close [as _onclose] (net.js:554:12)
npm verb cwd /home/user1/app/mc_api/lib/reports
npm verb Linux 2.6.32-754.3.5.el6.x86_64
npm verb argv "/home/user1/node_local_install/.nvm/versions/node/v8.9.4/bin/node" "/home/user1/node_local_install/.nvm/versions/node/v8.9.4/bin/npm" "install" "--save" "slack/client" "--loglevel" "verbose"
npm verb node v8.9.4
npm verb npm  v6.4.1
npm ERR! Error while executing:
npm ERR! /usr/bin/git ls-remote -h -t ssh://git@github.com/slack/client.git
npm ERR!
npm ERR! ssh: connect to host github.com port 22: Connection refused
npm ERR! fatal: Could not read from remote repository.
npm ERR!
npm ERR! Please make sure you have the correct access rights
npm ERR! and the repository exists.
npm ERR!
npm ERR! exited with error code: 128
npm verb exit [ 1, true ]
npm timing npm Completed in 1497ms

npm ERR! A complete log of this run can be found in:
npm ERR!     /home/user1/.npm/_logs/2018-10-10T19_34_06_306Z-debug.log

[15:29:07] user1@sb-user1:~/app/mc_api/lib/reports $ npm install --save slack/client --loglevel verbose

npm info it worked if it ends with ok

npm verb cli [ '/home/user1/node_local_install/.nvm/versions/node/v8.9.4/bin/node',

npm verb cli '/home/user1/node_local_install/.nvm/versions/node/v8.9.4/bin/npm',

npm verb cli 'install',

npm verb cli '--save',

npm verb cli 'slack/client',

npm verb cli '--loglevel',

npm verb cli 'verbose' ]

npm info using npm@6.4.1

npm info using node@v8.9.4

npm verb npm-session f40f30f7bf0339f9

npm timing stage:rollbackFailedOptional Completed in 1ms

npm timing stage:runTopLevelLifecycles Completed in 1114ms

npm verb stack Error: exited with error code: 128

npm verb stack at ChildProcess.<anonymous> (/home/user1/node_local_install/.nvm/versions/node/v8.9.4/lib/node_modules/npm/node_modules/pacote/lib/util/finished.js:12:19)

npm verb stack at emitTwo (events.js:126:13)

npm verb stack at ChildProcess.emit (events.js:214:7)

npm verb stack at maybeClose (internal/child_process.js:925:16)

npm verb stack at Socket.stream.socket.on (internal/child_process.js:346:11)

npm verb stack at emitOne (events.js:116:13)

npm verb stack at Socket.emit (events.js:211:7)

npm verb stack at Pipe._handle.close [as _onclose] (net.js:554:12)

npm verb cwd /home/user1/app/mc_api/lib/reports

npm verb Linux 2.6.32-754.3.5.el6.x86_64

npm verb argv "/home/user1/node_local_install/.nvm/versions/node/v8.9.4/bin/node" "/home/user1/node_local_install/.nvm/versions/node/v8.9.4/bin/npm" "install" "--save" "slack/client" "--loglevel" "verbose"

npm verb node v8.9.4

npm verb npm v6.4.1

npm ERR! Error while executing:

npm ERR! /usr/bin/git ls-remote -h -t ssh://[email protected]/slack/client.git

npm ERR!

npm ERR! ssh: connect to host github.com port 22: Connection refused

npm ERR! fatal: Could not read from remote repository.

npm ERR!

npm ERR! Please make sure you have the correct access rights

npm ERR! and the repository exists.

npm ERR!

npm ERR! exited with error code: 128

npm verb exit [ 1, true ]

npm timing npm Completed in 1497ms

npm ERR! A complete log of this run can be found in:

npm ERR! /home/user1/.npm/_logs/2018-10-10T19_34_06_306Z-debug.log

As you can see, we are getting choked at:

npm ERR! ssh: connect to host github.com port 22: Connection refused
npm ERR! fatal: Could not read from remote repository.

1 2	npm ERR! ssh: connect to host github.com port 22: Connection refused npm ERR! fatal: Could not read from remote repository.

To get around this we need to change the config.yml on the server to allow proxies to github, here is the final configuration. Hope this helps other users as we had a fun time trying to figure it out. Pay attention to the uplinks section and the proxy requests where github is defined.

#
# This is the default config file. It allows all users to do anything,
# so don't use it on production systems.
#
# Look here for more config file examples:
# https://github.com/rlidwka/sinopia/tree/master/conf
#

# path to a directory with all packages
storage: ./storage

auth:
  htpasswd:
    file: ./htpasswd
    # Maximum amount of users allowed to register, defaults to "+inf".
    # You can set this to -1 to disable registration.
    #max_users: 1000

# a list of other known repositories we can talk to
uplinks:
  npmjs:
    url: https://registry.npmjs.org/
  github:
    url: https://github.com/

packages:
  '@*/*':
    # scoped packages
    access: $all
    publish: $authenticated
    proxy:
      - npmjs
      - github


  '*':
    # allow all users (including non-authenticated users) to read and
    # publish all packages
    #
    # you can specify usernames/groupnames (depending on your auth plugin)
    # and three keywords: "$all", "$anonymous", "$authenticated"
    access: $all

    # allow all known users to publish packages
    # (anyone can register by default, remember?)
    publish: $authenticated

    # if package is not available locally, proxy requests to 'npmjs' registry
    proxy:
      - npmjs
      - github
# log settings
logs:
  #- {type: stdout, format: pretty, level: http}
  - {type: file, path: sinopia.log, level: debug}

#Bind Address
listen:
  - 0.0.0.0:4873
#

# This is the default config file. It allows all users to do anything,

# so don't use it on production systems.

# Look here for more config file examples:

# https://github.com/rlidwka/sinopia/tree/master/conf

# path to a directory with all packages

storage: ./storage

auth:

htpasswd:

file: ./htpasswd

# Maximum amount of users allowed to register, defaults to "+inf".

# You can set this to -1 to disable registration.

#max_users: 1000

# a list of other known repositories we can talk to

uplinks:

npmjs:

url: https://registry.npmjs.org/

github:

url: https://github.com/

packages:

'@*/*':

# scoped packages

access: $all

publish: $authenticated

proxy:

- npmjs

- github

'*':

# allow all users (including non-authenticated users) to read and

# publish all packages

# you can specify usernames/groupnames (depending on your auth plugin)

# and three keywords: "$all", "$anonymous", "$authenticated"

access: $all

# allow all known users to publish packages

# (anyone can register by default, remember?)

publish: $authenticated

# if package is not available locally, proxy requests to 'npmjs' registry

proxy:

- npmjs

- github

# log settings

logs:

#- {type: stdout, format: pretty, level: http}

- {type: file, path: sinopia.log, level: debug}

#Bind Address

listen:

- 0.0.0.0:4873

PSQL Connect To AWS Redshift From Windows 10 PowerShell

March 16, 2018December 20, 2019 admin2 Comments

Coming from a completely Linux background, I was tasked with connecting to a aws redshift cluster or a postgres cluster via Windows powershell and PSQL. I knew it was possible and searching the internet came up with CMD prompt solutions, when I attempted via powershell, I was faced with the following error below, you will need to install postgres on windows10 to get access to the psql binary, you can get it here:
https://www.postgresql.org/download/windows/

PS C:\WINDOWS\system32> psql.exe -h afs-rs-dev02.us-east-1.redshift.amazonaws.com  -p 5439 -U awsmaster benchmark01
Password for user awsmaster:
psql: FATAL:  invalid value for parameter "client_encoding": "WIN1252"

PS C:\WINDOWS\system32> psql.exe -h afs-rs-dev02.us-east-1.redshift.amazonaws.com -p 5439 -U awsmaster benchmark01

Password for user awsmaster:

psql: FATAL: invalid value for parameter "client_encoding": "WIN1252"

Turns out a colleague of mine and I figured out you will need to set the variable PGCLIENTENCODING via the powershell command line. This was expected but we could not nail down the syntax, we found it.

PS C:\WINDOWS\system32> $env:PGCLIENTENCODING='utf-8';
PS C:\WINDOWS\system32> psql.exe -h afs-rs-dev02.us-east-1.redshift.amazonaws.com  -p 5439 -U awsmaster benchmark01
Password for user awsmaster:
psql (10.1, server 8.0.2)
WARNING: Console code page (437) differs from Windows code page (1252)
         8-bit characters might not work correctly. See psql reference
         page "Notes for Windows users" for details.
SSL connection (protocol: TLSv1.2, cipher: ECDHE-RSA-AES256-GCM-SHA384, bits: 256, compression: off)
Type "help" for help.

benchmark01=#

PS C:\WINDOWS\system32> $env:PGCLIENTENCODING='utf-8';

PS C:\WINDOWS\system32> psql.exe -h afs-rs-dev02.us-east-1.redshift.amazonaws.com -p 5439 -U awsmaster benchmark01

Password for user awsmaster:

psql (10.1, server 8.0.2)

WARNING: Console code page (437) differs from Windows code page (1252)

8-bit characters might not work correctly. See psql reference

page "Notes for Windows users" for details.

SSL connection (protocol: TLSv1.2, cipher: ECDHE-RSA-AES256-GCM-SHA384, bits: 256, compression: off)

Type "help" for help.

benchmark01=#

Once this is set, you can connect to PG as normal.

Python Generator Find Files With Wildcard

February 13, 2018February 13, 2018 adminLeave a comment

This is a neat way to generate file names in a directory that match a specific pattern, I use this to generate a list of files exported out of hive to load into S3.

def find_files(directory, pattern):
    for root, dirs, files in os.walk(directory):
        for basename in sorted(files):
            if fnmatch.fnmatch(basename, pattern):
                filename = os.path.join(root, basename)
                yield filename

def find_files(directory, pattern):

for root, dirs, files in os.walk(directory):

for basename in sorted(files):

if fnmatch.fnmatch(basename, pattern):

filename = os.path.join(root, basename)

yield filename

local_dir = '/mnt/share/etl/date/'
for files in find_files(local_dir,'*.gz'):
    key = files[1:]
    try:
        awss3.upload(key,files)
        log_msg = ('uploading file: [{0}] to S3').format(files)
        log.write(log_msg)
    except Exception as e:
        log_msg = ('ERROR: {0} uploading file: [{0}] to S3').format(e,files)
        log.write(log_msg, 'error')

local_dir = '/mnt/share/etl/date/'

for files in find_files(local_dir,'*.gz'):

key = files[1:]

try:

awss3.upload(key,files)

log_msg = ('uploading file: [{0}] to S3').format(files)

log.write(log_msg)

except Exception as e:

log_msg = ('ERROR: {0} uploading file: [{0}] to S3').format(e,files)

log.write(log_msg, 'error')

Python3 Subprocess and Rsync Deadlock Strace Timeout

February 11, 2018October 9, 2019 admin1 Comment

I recently came across a tough to debug issue where I was calling a shell script from python using the subprocess module, this shell script called rsync, no matter what I would always run into a timeout situation. I fired up strace and noticed that the process was in a timeout state.

select(4, NULL, [3], [3], {60, 0}) = 0 (Timeout)

I looked at the subprocess documentation and apparently using pipes will fill the system pipe buffer.

Warning

This will deadlock when using stdout=PIPE and/or stderr=PIPE and the child process generates enough output to a pipe such that it blocks waiting for the OS pipe buffer to accept more data. Use communicate() to avoid that.

I was baffled, I finally took the approach to eliminate stderr and stdout and just check the return status of the command using run(). Here is what I finally came up with, and all was well.

stdbuf -oL -e0 /usr/local/bin/rsync --outbuf=N -avz

1	stdbuf -oL -e0 /usr/local/bin/rsync --outbuf=N -avz

def execute_jobs(cmd):
     try:
         logger.info('Start Command: [%s]' % cmd)
         subprocess.run(shlex.split(cmd), check=True)
         logger.info('Command Success: [%s]' % cmd)
     except subprocess.CalledProcessError as e:
         logger.critical('[%s] FATAL: Command failed with error [%s]' % (cmd,e))

def execute_jobs(cmd):

try:

logger.info('Start Command: [%s]' % cmd)

subprocess.run(shlex.split(cmd), check=True)

logger.info('Command Success: [%s]' % cmd)

except subprocess.CalledProcessError as e:

logger.critical('[%s] FATAL: Command failed with error [%s]' % (cmd,e))

Hope you find this and it helps you.

Amazon Redshift Long Running Query Alert to Slack

September 16, 2017January 31, 2020 adminLeave a comment

This python code when called with a user that can query the STV_RECENTS table will check the duration on a current running query against the threshold set by the config in microseconds and send an alert to slack if it exceeds 30 minutes. I have it cronned up and running every 30 minutes.

CLI example:

[Prompt#]>python rs_long_running_query_alert.py

1	[Prompt#]>python rs_long_running_query_alert.py

You will need slackclient:
https://pypi.python.org/pypi/slackclient
You will need psycopg2:
https://pypi.python.org/pypi/psycopg2

#!/usr/bin/python2.6
__author__ = "Jason Ralph"

from datetime import timedelta
import os
import psycopg2
import ConfigParser
from slackclient import SlackClient
slack_token = os.environ["SLACK_API_TOKEN"]
sc = SlackClient(slack_token)


config = ConfigParser.ConfigParser()
config.read(('%s/qa.ini') % (os.environ['QA_INI_PATH']))
config.sections()

# redshift
rs_host = config.get('redshift', 'hostname')
rs_port = config.get('redshift', 'port')
rs_database = config.get('redshift', 'database')
rs_sys_user = config.get('redshift', 'sys_user')
rs_sys_pass = config.get('redshift', 'sys_password')
rs_app_lq_thresh = config.get('redshift', 'lq_app_threshold')


"""
Get running queries over set threshold set in config in microseconds.
"""


def get_long_running_queries():
    try:
        conn = psycopg2.connect(("dbname=%s user=%s host=%s password=%s port=%s") %
                                (rs_database, rs_sys_user, rs_host, rs_sys_pass, rs_port))
    except psycopg2.Error as e:
        print e

    cast_thresh = int(rs_app_lq_thresh)
    sql = ("select userid, \
                   status, \
                   starttime, \
                   duration, \
                   user_name, \
                   db_name, \
                   query, \
                   pid from stv_recents \
                   where duration >= %s and status='Running';") % cast_thresh
    cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
    cursor.execute(sql)
    count = 0

    while True:
        row = cursor.fetchone()
        if row is None:
            break
        userid =    row['userid']
        status =    row['status']
        starttime = row['starttime']
        duration =  row['duration']
        user_name = row['user_name']
        db_name =   row['db_name']
        query =     row['query']
        pid =       row['pid']
        human_date_string = timedelta(microseconds=duration)

        if (user_name == 'user_1'
            or user_name == 'user_2'
            or user_name == 'user_3' and duration >= rs_app_lq_thresh):
            count += 1
            msg_items = []
            msg_items.append('%s: App Limit Breached - Long Running Query Alert\n' % rs_database)
            msg_items.append('PID: %s\n' % pid)
            msg_items.append('COUNT: %s\n' % count)
            msg_items.append('USERID: %s\n' % userid)
            msg_items.append('STATUS: %s\n' % status)
            msg_items.append('STARTTIME: %s\n' % starttime)
            msg_items.append('DURATION: %s\n' % human_date_string)
            msg_items.append('USER_NAME: %s\n' % user_name)
            msg_items.append('DB_NAME: %s\n' % db_name)
            msg_items.append('QUERY: %s' % query)
            msg = ''.join(msg_items)
            sc.api_call('chat.postMessage', channel='#python', text="'%s' :tada:") % msg
    conn.close()


def main():
    get_long_running_queries()


if __name__ == '__main__':
    main()

#!/usr/bin/python2.6

__author__ = "Jason Ralph"

from datetime import timedelta

import os

import psycopg2

import ConfigParser

from slackclient import SlackClient

slack_token = os.environ["SLACK_API_TOKEN"]

sc = SlackClient(slack_token)

config = ConfigParser.ConfigParser()

config.read(('%s/qa.ini') % (os.environ['QA_INI_PATH']))

config.sections()

# redshift

rs_host = config.get('redshift', 'hostname')

rs_port = config.get('redshift', 'port')

rs_database = config.get('redshift', 'database')

rs_sys_user = config.get('redshift', 'sys_user')

rs_sys_pass = config.get('redshift', 'sys_password')

rs_app_lq_thresh = config.get('redshift', 'lq_app_threshold')

"""

Get running queries over set threshold set in config in microseconds.

"""

def get_long_running_queries():

try:

conn = psycopg2.connect(("dbname=%s user=%s host=%s password=%s port=%s") %

(rs_database, rs_sys_user, rs_host, rs_sys_pass, rs_port))

except psycopg2.Error as e:

print e

cast_thresh = int(rs_app_lq_thresh)

sql = ("select userid, \

status, \

starttime, \

duration, \

user_name, \

db_name, \

query, \

pid from stv_recents \

where duration >= %s and status='Running';") % cast_thresh

cursor = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)

cursor.execute(sql)

count = 0

while True:

row = cursor.fetchone()

if row is None:

break

userid = row['userid']

status = row['status']

starttime = row['starttime']

duration = row['duration']

user_name = row['user_name']

db_name = row['db_name']

query = row['query']

pid = row['pid']

human_date_string = timedelta(microseconds=duration)

if (user_name == 'user_1'

or user_name == 'user_2'

or user_name == 'user_3' and duration >= rs_app_lq_thresh):

count += 1

msg_items = []

msg_items.append('%s: App Limit Breached - Long Running Query Alert\n' % rs_database)

msg_items.append('PID: %s\n' % pid)

msg_items.append('COUNT: %s\n' % count)

msg_items.append('USERID: %s\n' % userid)

msg_items.append('STATUS: %s\n' % status)

msg_items.append('STARTTIME: %s\n' % starttime)

msg_items.append('DURATION: %s\n' % human_date_string)

msg_items.append('USER_NAME: %s\n' % user_name)

msg_items.append('DB_NAME: %s\n' % db_name)

msg_items.append('QUERY: %s' % query)

msg = ''.join(msg_items)

sc.api_call('chat.postMessage', channel='#python', text="'%s' :tada:") % msg

conn.close()

def main():

get_long_running_queries()

if __name__ == '__main__':

main()

INI file:

[redshift]
hostname = redshift.amazonaws.com
port = 5439
database = redshift
user = user
password = password
schema = public
sys_user = rs_sys
sys_password = password
lq_app_threshold = 3600000000

[redshift]

hostname = redshift.amazonaws.com

port = 5439

database = redshift

user = user

password = password

schema = public

sys_user = rs_sys

sys_password = password

lq_app_threshold = 3600000000

Slack message example:

'db-redshift' APP [10:15 AM] 
redshift_dev: Long Running Query Alert
PID: 10723
COUNT: 1
USERID: 100
STATUS: Running             
STARTTIME: 2017-09-16 11:00:01.417040
DURATION: 3:15:00.735017
USER_NAME: admin                                             
DB_NAME: database                                      
QUERY: vacuum;

'db-redshift' APP [10:15 AM]

redshift_dev: Long Running Query Alert

PID: 10723

COUNT: 1

USERID: 100

STATUS: Running

STARTTIME: 2017-09-16 11:00:01.417040

DURATION: 3:15:00.735017

USER_NAME: admin

DB_NAME: database

QUERY: vacuum;

Nagios Python Plugin Check If File Is Stale

July 27, 2017 adminLeave a comment

Wrote this simple plugin to check if a log file was stale on a server using nagios and nrpe. This plugin checks multiple files with the app. naming convention.

#!/usr/bin/env python

__author__ = "Jason Ralph"

""" IMPORTS """
import datetime
import time
import re
import sys
import os
import optparse
import os.path
import glob

""" NAGIOS API RETURN CODES """
nag_ret_dict = {'NagOk': 0,
                'NagWarn': 1,
                'NagCrit': 2,
                'NagUnkown': 3}

def main():
    for file in glob.glob('/var/log/httpd/app.*'):
        now = datetime.datetime.now()
        then = datetime.datetime.fromtimestamp(os.path.getmtime(file))
        tdelta = now - then
        if not file:
            print("OK: Waiting for file")
            sys.exit(nag_ret_dict['NagOk'])
        elif tdelta > datetime.timedelta(days=1):
            print("CRIT: %s: Is greater then %s") % (file, tdelta)
            sys.exit(nag_ret_dict['NagCrit'])
        else:
            print("OK: NOW: %s - THEN: %s - DELTA: %s" ) % (now, then, tdelta)
            sys.exit(nag_ret_dict['NagOk'])

if __name__ == "__main__":
    main()

#!/usr/bin/env python

__author__ = "Jason Ralph"

""" IMPORTS """

import datetime

import time

import re

import sys

import os

import optparse

import os.path

import glob

""" NAGIOS API RETURN CODES """

nag_ret_dict = {'NagOk': 0,

'NagWarn': 1,

'NagCrit': 2,

'NagUnkown': 3}

def main():

for file in glob.glob('/var/log/httpd/app.*'):

now = datetime.datetime.now()

then = datetime.datetime.fromtimestamp(os.path.getmtime(file))

tdelta = now - then

if not file:

print("OK: Waiting for file")

sys.exit(nag_ret_dict['NagOk'])

elif tdelta > datetime.timedelta(days=1):

print("CRIT: %s: Is greater then %s") % (file, tdelta)

sys.exit(nag_ret_dict['NagCrit'])

else:

print("OK: NOW: %s - THEN: %s - DELTA: %s" ) % (now, then, tdelta)

sys.exit(nag_ret_dict['NagOk'])

if __name__ == "__main__":

main()

POSTGRES – Top 100 Tables In Tablespace

May 30, 2017May 30, 2017 adminLeave a comment

I had a situation where I needed to find the top 100 largest tables in a certain tablespace on a postgres 9 database, in my case we archive tables into an archive1 tablespace. This query will find all the largest relations in the archive1 tablespace. Its important to swap out ‘archive1’ with whatever tablespace you are trying to list.

SELECT N.nspname || '.' || C.relname AS "relation",
    CASE WHEN reltype = 0
        THEN pg_size_pretty(pg_total_relation_size(C.oid)) || ' (index)'
        ELSE pg_size_pretty(pg_total_relation_size(C.oid)) || ' (' ||  pg_size_pretty(pg_relation_size(C.oid)) || ' data)'
    END AS "size (data)",
    COALESCE(T.tablespace, I.tablespace, '') AS "tablespace"
FROM pg_class C
LEFT JOIN pg_namespace N ON  (N.oid = C.relnamespace)
LEFT JOIN pg_tables T ON (T.tablename = C.relname)
LEFT JOIN pg_indexes I ON (I.indexname = C.relname)
LEFT JOIN pg_tablespace TS ON TS.spcname = T.tablespace
LEFT JOIN pg_tablespace XS ON XS.spcname = I.tablespace
WHERE nspname NOT IN ('pg_catalog','pg_toast','information_schema') 
AND COALESCE(T.tablespace, I.tablespace, '') = 'archive1'
ORDER BY pg_total_relation_size(C.oid) DESC LIMIT 100;
                          relation                          |         size (data)          | tablespace
------------------------------------------------------------+------------------------------+------------
 public.table1                                              | 172 GB (148 GB data)         | archive1
 public.table2                                              | 171 GB (147 GB data)         | archive1
 public.table3                                              | 32 GB (32 GB data)           | archive1

--SNIP--

SELECT N.nspname || '.' || C.relname AS "relation",

CASE WHEN reltype = 0

THEN pg_size_pretty(pg_total_relation_size(C.oid)) || ' (index)'

ELSE pg_size_pretty(pg_total_relation_size(C.oid)) || ' (' || pg_size_pretty(pg_relation_size(C.oid)) || ' data)'

END AS "size (data)",

COALESCE(T.tablespace, I.tablespace, '') AS "tablespace"

FROM pg_class C

LEFT JOIN pg_namespace N ON (N.oid = C.relnamespace)

LEFT JOIN pg_tables T ON (T.tablename = C.relname)

LEFT JOIN pg_indexes I ON (I.indexname = C.relname)

LEFT JOIN pg_tablespace TS ON TS.spcname = T.tablespace

LEFT JOIN pg_tablespace XS ON XS.spcname = I.tablespace

WHERE nspname NOT IN ('pg_catalog','pg_toast','information_schema')

AND COALESCE(T.tablespace, I.tablespace, '') = 'archive1'

ORDER BY pg_total_relation_size(C.oid) DESC LIMIT 100;

relation | size (data) | tablespace

------------------------------------------------------------+------------------------------+------------

public.table1 | 172 GB (148 GB data) | archive1

public.table2 | 171 GB (147 GB data) | archive1

public.table3 | 32 GB (32 GB data) | archive1

--SNIP--

Hope this helps you out, took some time to get it to work.

Jason R. Ralph

Linux All Day Everyday

Postgres Long Running Active Queries Send To Slack

Python Function Execute Subprocess With Timeout

CENTOS6 Postgres pg_upgrade 9 to 11 – In Place – Link – No Copy – Limited Disk Space

SINOPIA NPM allow connections to GITHUB as well as the NPM registry

PSQL Connect To AWS Redshift From Windows 10 PowerShell

Python Generator Find Files With Wildcard

Python3 Subprocess and Rsync Deadlock Strace Timeout

Amazon Redshift Long Running Query Alert to Slack

Nagios Python Plugin Check If File Is Stale

POSTGRES – Top 100 Tables In Tablespace