$ cd ~
$ pip3 install happybase
$ python -c 'import happybase'
Note that a total of ten (10) services can be observed eventually.
$ cd ~
$ hbase thrift start -p 9090 &
Find the pid for the Thrift service, and stop the Thrift service using the identified pid
$ ps -aux | grep 9090 hduser 9441 1.5 3.1 2928716 152960 pts/2 Sl 07:46 0:05 $ kill 9441
Or,
$ jps 6849 NodeManager 9441 ThriftServer 7233 QuorumPeerMain 8321 HMaster 6227 DataNode 6451 SecondaryNameNode 6053 NameNode 9735 Jps 8539 HRegionServer 6700 ResourceManager 7613 Kafka $ kill 9441
$ python
When you want to exit the python command-line interpreter, type
>> exit()
>>> import happybase
>>> connection = happybase.Connection(port=9090)
>>> print(connection.tables())
>>> t = connection.table('linkshare')
>>> rs = t.scan()
>>> for r in rs:
... print(r)
$ python
>>> connection.create_table('mytable', {'cf1':dict()})
If you encounter the following exception,
thriftpy2.transport.base.TTransportException: TTransportException(type=4, message=’TSocket read 0 bytes’)
Run the statement to create a connection to the localhost again and then the statement to create the table once again. List all the available HBase tables to confirm that the table has been successfully created.
>>> connection.create_table('mytable2', {'cf1':dict(max_versions=10),'cf2':dict(max_versions=2, block_cache_enabled=False), 'cf3':dict()})
List all the available HBase tables to confirm that the table has been successfully created.
>>> connection2 = happybase.Connection(host='localhost', port=9090, table_prefix='prj_pfx')
>>> connection2.create_table('mytable3', {'cf1':dict(max_versions=5),'cf2':dict(max_versions=2)})
List all the available HBase tables to confirm that the table has been successfully created.
$ cd ~/hbase
$ bin/hbase shell
hbase(main):001:0> list
hbase(main):002:0> describe 'mytable'
hbase(main):003:0> describe 'mytable2'
hbase(main):004:0> describe 'prj_pfx_mytable3'
>>> c = happybase.Connection(host='localhost', port=9090)
>>> t = c.table('mytable') # get a Table instance
>>> t.put('k1', {'cf1:c1':'v1'})
>>> t.put('k1', {'cf1:c2':'v2'})
>>> t.put('k1', {'cf1:c3':'v3'})
>>> t.put('k2', {'cf1:c1':'vv1', 'cf1:c2':'vv2', 'cf1:c3':'vv3'})
>>> c = happybase.Connection(port=9090)
>>> t3 = c.table('prj_pfx_mytable3')
>>> t3.put('rk1', {'cf1:country':'UK'})
>>> t3.put('rk1', {'cf1:city':'London'})
>>> t3.put('rk1', {'cf1:industry':'Manufacturing'})
>>> t3.put('rk1', {'cf2:department':'Production', 'cf2:title':'Senior Manager'})
>>> t3.put('rk2', {'cf1:country':'Malaysia', 'cf1:city':'KL', 'cf1:industry':'Software Development', 'cf2:department':'QA', 'cf2:title':'Test Engineer'})
If you encounter a socket.error: [Errno 32] Broken pipe error, make sure that you have obtained the Table instance. If the error still exists, then kill the Thrift process and re-start it.
>>> row = t3.row(b'rk1')
>>> print(row[b'cf1:industry'])
>>> print(row[b'cf1:country'])
>>> print(row[b'cf1:city'])
>>> print(row[b'cf2:department'])
>>> print(row[b'cf2:title'])
>>> row = t3.row(b'rk2')
>>> print(row[b'cf1:country'])
>>> print(row[b'cf1:city'])
>>> print(row[b'cf1:industry'])
>>> print(row[b'cf2:department'])
>>> print(row[b'cf2:title'])
>>> print("data for row-key 'rk2':\n", row)
>>> m_rows = t3.rows(['rk1', 'rk2'])
>>> for r in m_rows:
... print("data in current row: ", r)
>>> for r in m_rows:
... r_as_dict = dict(r[1])
... print("\nr: ", r)
... print("\tAs dict: ", r_as_dict)
>>> city_list = []
>>> for r in m_rows:
... r_as_dict = dict(r[1])
... city_list.append(r_as_dict[b'cf1:city'])
...
>>> print(city_list)
>>> c = happybase.Connection(port=9090)
>>> t3 = c.table('prj_pfx_mytable3')
>>> row = t3.row(b'rk1', columns=[b'cf1:country', b'cf2:title'])
>>> print(row)
>>> print(row[b'cf1:country'])
>>> print(row[b'cf2:title'])
>>> c = happybase.Connection(port=9090)
>>> t3 = c.table('prj_pfx_mytable3')
>>> row = t3.row(b'rk1', columns=[b'cf1'])
>>> print(row)
>>> c = happybase.Connection(port=9090)
>>> t3 = c.table('prj_pfx_mytable3')
>>> row = t3.row(b'rk1', columns=[b'cf1:country'], include_timestamp=True)
>>> value, timestamp = row[b'cf1:country']
>>> print(value)
>>> print(timestamp)
>>> cells = t3.cells(b'rk1', b'cf1:country', versions=3, include_timestamp=True)
>>> for value, timestamp in cells:
... print("Cell data at {}:{}".format(timestamp, value))
>>> for key, data in t3.scan():
... print("\nCompany record key: ", key.decode())
... print("\tCompany info:")
... print("\t\tLocation: {}, {}".format(data[b'cf1:city'].decode(), data[b'cf1:country'].decode()))
... print("\t\tIndustry: {}".format(data[b'cf1:industry'].decode()))
... print("\tContact person info:")
... print("\t\tTitle & Department: {}, {}".format(data[b'cf2:title'].decode(), data[b'cf2:department'].decode()))
...
>>> for key, data in t3.scan(row_start=b'rk2'):
... print(key, data)
>>> for key, data in t3.scan(row_stop=b'rk2'):
... print(key, data)
>>> for key, data in t3.scan(row_start=b'rk1', row_stop=b'rk3'):
... print(key, data)
...
Have a look from hbase shell
hbase> scan 'prj_pfx_mytable3'
>>> t3.put('kr4', {'cf1:country':'Australia', 'cf1:city':'Sydney', 'cf1:industry':'Tourism', 'cf2:department':'Human Resource', 'cf2:title':'Manager'})
>>> for key, data in t3.scan(row_prefix=b'k'):
... print(key, data)
...
>>> t3.delete(b'rk2')
>>> t3.delete(b'rk1', columns=[b'cf1:city', b'cf2:title'])