chmura/jupyter/UAM_Lab_1_reading_from_kinesis_stream.ipynb

10 KiB

Lab1 Czytanie Kinesis Data Streams

Przebieg ćwiczenia

  • Stwórz Data Stream
  • wygeneruj testowe dane do streama
  • odczytaj dane ze streama (ShardIterator)
  • zwróć uwagę na iteracje po shardach i iteratorach (per shard)
  • porównaj przeczytane dane z danymi wygenerowanymi (czytamy dwie iteracje - pierwsze 10 rekordów TRIM_HORIZON)
  • sprawdź jakie inne opcje ustawienia punktu w shardzie są
import boto3
from pprint import pprint

kinesis_client = boto3.client('kinesis')
kinesis_client.list_streams()["StreamNames"]
['cryptostock-dev-100603781557-jk-12345']
STREAM_NAME = kinesis_client.list_streams()["StreamNames"][0]
STREAM_NAME
'cryptostock-dev-100603781557-jk-12345'
response = kinesis_client.describe_stream(StreamName=STREAM_NAME)   
pprint(response["StreamDescription"]["Shards"])
[{'HashKeyRange': {'EndingHashKey': '340282366920938463463374607431768211455',
                   'StartingHashKey': '0'},
  'SequenceNumberRange': {'StartingSequenceNumber': '49617445977150094507622122574044516561004852020651229186'},
  'ShardId': 'shardId-000000000000'}]
shard_ids = []
stream_name = None 
if response and 'StreamDescription' in response:
    stream_name= response['StreamDescription']['StreamName'] 
    
    # reading all shards (getting shard iterators)
    for shard_id in response['StreamDescription']['Shards']:
        shard_id = shard_id['ShardId']        
        shard_iterator = kinesis_client.get_shard_iterator(StreamName=stream_name, ShardId = shard_id, ShardIteratorType="TRIM_HORIZON")
        
        si = shard_iterator["ShardIterator"]
        shard_ids.append({'shard_id' : shard_id ,'shard_iterator' : si })
            
shard_ids
[{'shard_id': 'shardId-000000000000',
  'shard_iterator': 'AAAAAAAAAAEfLA4f5f+lMhjNfHXIXsKxQeP3dg79sVKKRiT+843gRXwSQsYRXeMIS4KwdRUjPdChkE2ZZGYSG3DeghHZi41DXOE0pNSdFHnqkePkBVIX2cN/9rbedZTgX/WXfNaL+sMUfdbYV6f9iQEtTtRAYN3bXfk5jUwIBvcgB1mQDRzdT1Or150vbf3LSlLtC7XlkK7HNZoGM1t577jseZTyvJ4+yeBOV73DQnSFnL/EPQvVdm+lidZtaNe39NMak4bXx5AWmhwblwLPmXg/l2PMDx7Z'}]
limit = 5
response_get_rec = kinesis_client.get_records(ShardIterator = si , Limit = limit)
next_shard_iterator = response_get_rec['NextShardIterator']
pprint(response_get_rec["Records"])
[{'ApproximateArrivalTimestamp': datetime.datetime(2021, 4, 18, 14, 18, 25, 191000, tzinfo=tzlocal()),
  'Data': b'{"transaction_ts": 1601510403, "symbol": "ETH_USD", "price": 360'
          b'.03, "amount": 0.646, "dollar_amount": 232.57938, "type": "buy",'
          b' "trans_id": 124289044}\n',
  'PartitionKey': 'ETH_USD',
  'SequenceNumber': '49617445977150094507622122578777461144796130256147709954'},
 {'ApproximateArrivalTimestamp': datetime.datetime(2021, 4, 18, 14, 18, 25, 310000, tzinfo=tzlocal()),
  'Data': b'{"transaction_ts": 1601510403, "symbol": "BTC_USD", "price": 107'
          b'80.83, "amount": 0.035, "dollar_amount": 377.32905, "type": "buy'
          b'", "trans_id": 124289043}\n',
  'PartitionKey': 'BTC_USD',
  'SequenceNumber': '49617445977150094507622122578778670070615744885322416130'},
 {'ApproximateArrivalTimestamp': datetime.datetime(2021, 4, 18, 14, 18, 25, 428000, tzinfo=tzlocal()),
  'Data': b'{"transaction_ts": 1601510404, "symbol": "ETH_USD", "price": 360'
          b'.12, "amount": 0.523, "dollar_amount": 188.34276, "type": "buy",'
          b' "trans_id": 124289045}\n',
  'PartitionKey': 'ETH_USD',
  'SequenceNumber': '49617445977150094507622122578779878996435359514497122306'},
 {'ApproximateArrivalTimestamp': datetime.datetime(2021, 4, 18, 14, 18, 25, 545000, tzinfo=tzlocal()),
  'Data': b'{"transaction_ts": 1601510405, "symbol": "BTC_USD", "price": 107'
          b'84.42, "amount": 0.25635676, "dollar_amount": 2764.65897, "type"'
          b': "buy", "trans_id": 124289050}\n',
  'PartitionKey': 'BTC_USD',
  'SequenceNumber': '49617445977150094507622122578781087922254974143671828482'},
 {'ApproximateArrivalTimestamp': datetime.datetime(2021, 4, 18, 14, 18, 25, 663000, tzinfo=tzlocal()),
  'Data': b'{"transaction_ts": 1601510407, "symbol": "BTC_USD", "price": 107'
          b'84.42, "amount": 0.23877038, "dollar_amount": 2575.000061, "type'
          b'": "buy", "trans_id": 124289051}\n',
  'PartitionKey': 'BTC_USD',
  'SequenceNumber': '49617445977150094507622122578782296848074588772846534658'}]
print(next_shard_iterator)
response_get_rec = kinesis_client.get_records(ShardIterator = next_shard_iterator , Limit = limit)
pprint(response_get_rec["Records"])
AAAAAAAAAAGUetfZYhJAUbRmLdnxgHF2gXHQ+Yt8063YzfurEZ+Vdauri9LJ13JLrPqLIrBxeHRJ1GEBctxNJ4jYeB4Um/JNu4+2L5Jfa1Apl9s9y6f/5UMZlqIAFGvUPmW53Gj6MyauM9r7EWNBUBZCvrFQkHvC9fQwNYP3eyYm1xp4K9fcjBX90qUdnGmFU69bq+3BF5I7PXgPHitcwzJev6PqPLVny2SmhSHtnRF/Rogj00Xv+DtKo1/SBdVid3tyQ0e9tm4XrgttPfPIhNsJB3j57lpM
[{'ApproximateArrivalTimestamp': datetime.datetime(2021, 4, 18, 14, 18, 25, 812000, tzinfo=tzlocal()),
  'Data': b'{"transaction_ts": 1601510409, "symbol": "BTC_USD", "price": 107'
          b'84.42, "amount": 1.01303547, "dollar_amount": 10924.99998, "type'
          b'": "buy", "trans_id": 124289054}\n',
  'PartitionKey': 'BTC_USD',
  'SequenceNumber': '49617445977150094507622122578783505773894203402021240834'},
 {'ApproximateArrivalTimestamp': datetime.datetime(2021, 4, 18, 14, 18, 25, 930000, tzinfo=tzlocal()),
  'Data': b'{"transaction_ts": 1601510410, "symbol": "BTC_USD", "price": 107'
          b'84.42, "amount": 0.26135077, "dollar_amount": 2818.516471, "type'
          b'": "buy", "trans_id": 124289055}\n',
  'PartitionKey': 'BTC_USD',
  'SequenceNumber': '49617445977150094507622122578784714699713818031195947010'},
 {'ApproximateArrivalTimestamp': datetime.datetime(2021, 4, 18, 14, 18, 26, 48000, tzinfo=tzlocal()),
  'Data': b'{"transaction_ts": 1601510413, "symbol": "ETH_USD", "price": 360'
          b'.39, "amount": 5.55416701, "dollar_amount": 2001.666249, "type":'
          b' "buy", "trans_id": 124289059}\n',
  'PartitionKey': 'ETH_USD',
  'SequenceNumber': '49617445977150094507622122578785923625533432660370653186'},
 {'ApproximateArrivalTimestamp': datetime.datetime(2021, 4, 18, 14, 18, 26, 165000, tzinfo=tzlocal()),
  'Data': b'{"transaction_ts": 1601510414, "symbol": "ETH_USD", "price": 360'
          b'.6, "amount": 13.855, "dollar_amount": 4996.113, "type": "buy", '
          b'"trans_id": 124289071}\n',
  'PartitionKey': 'ETH_USD',
  'SequenceNumber': '49617445977150094507622122578787132551353047358264836098'},
 {'ApproximateArrivalTimestamp': datetime.datetime(2021, 4, 18, 14, 18, 26, 282000, tzinfo=tzlocal()),
  'Data': b'{"transaction_ts": 1601510415, "symbol": "ETH_USD", "price": 360'
          b'.24, "amount": 6.32869733, "dollar_amount": 2279.849926, "type":'
          b' "sell", "trans_id": 124289072}\n',
  'PartitionKey': 'ETH_USD',
  'SequenceNumber': '49617445977150094507622122578788341477172661987439542274'}]