10 KiB
10 KiB
Lab1 Czytanie Kinesis Data Streams
Przebieg ćwiczenia
- Stwórz Data Stream
- wygeneruj testowe dane do streama
- odczytaj dane ze streama (ShardIterator)
- zwróć uwagę na iteracje po shardach i iteratorach (per shard)
- porównaj przeczytane dane z danymi wygenerowanymi (czytamy dwie iteracje - pierwsze 10 rekordów TRIM_HORIZON)
- sprawdź jakie inne opcje ustawienia punktu w shardzie są
import boto3
from pprint import pprint
kinesis_client = boto3.client('kinesis')
kinesis_client.list_streams()["StreamNames"]
['cryptostock-dev-100603781557-jk-12345']
STREAM_NAME = kinesis_client.list_streams()["StreamNames"][0]
STREAM_NAME
'cryptostock-dev-100603781557-jk-12345'
response = kinesis_client.describe_stream(StreamName=STREAM_NAME)
pprint(response["StreamDescription"]["Shards"])
[{'HashKeyRange': {'EndingHashKey': '340282366920938463463374607431768211455', 'StartingHashKey': '0'}, 'SequenceNumberRange': {'StartingSequenceNumber': '49617445977150094507622122574044516561004852020651229186'}, 'ShardId': 'shardId-000000000000'}]
shard_ids = []
stream_name = None
if response and 'StreamDescription' in response:
stream_name= response['StreamDescription']['StreamName']
# reading all shards (getting shard iterators)
for shard_id in response['StreamDescription']['Shards']:
shard_id = shard_id['ShardId']
shard_iterator = kinesis_client.get_shard_iterator(StreamName=stream_name, ShardId = shard_id, ShardIteratorType="TRIM_HORIZON")
si = shard_iterator["ShardIterator"]
shard_ids.append({'shard_id' : shard_id ,'shard_iterator' : si })
shard_ids
[{'shard_id': 'shardId-000000000000', 'shard_iterator': 'AAAAAAAAAAEfLA4f5f+lMhjNfHXIXsKxQeP3dg79sVKKRiT+843gRXwSQsYRXeMIS4KwdRUjPdChkE2ZZGYSG3DeghHZi41DXOE0pNSdFHnqkePkBVIX2cN/9rbedZTgX/WXfNaL+sMUfdbYV6f9iQEtTtRAYN3bXfk5jUwIBvcgB1mQDRzdT1Or150vbf3LSlLtC7XlkK7HNZoGM1t577jseZTyvJ4+yeBOV73DQnSFnL/EPQvVdm+lidZtaNe39NMak4bXx5AWmhwblwLPmXg/l2PMDx7Z'}]
limit = 5
response_get_rec = kinesis_client.get_records(ShardIterator = si , Limit = limit)
next_shard_iterator = response_get_rec['NextShardIterator']
pprint(response_get_rec["Records"])
[{'ApproximateArrivalTimestamp': datetime.datetime(2021, 4, 18, 14, 18, 25, 191000, tzinfo=tzlocal()), 'Data': b'{"transaction_ts": 1601510403, "symbol": "ETH_USD", "price": 360' b'.03, "amount": 0.646, "dollar_amount": 232.57938, "type": "buy",' b' "trans_id": 124289044}\n', 'PartitionKey': 'ETH_USD', 'SequenceNumber': '49617445977150094507622122578777461144796130256147709954'}, {'ApproximateArrivalTimestamp': datetime.datetime(2021, 4, 18, 14, 18, 25, 310000, tzinfo=tzlocal()), 'Data': b'{"transaction_ts": 1601510403, "symbol": "BTC_USD", "price": 107' b'80.83, "amount": 0.035, "dollar_amount": 377.32905, "type": "buy' b'", "trans_id": 124289043}\n', 'PartitionKey': 'BTC_USD', 'SequenceNumber': '49617445977150094507622122578778670070615744885322416130'}, {'ApproximateArrivalTimestamp': datetime.datetime(2021, 4, 18, 14, 18, 25, 428000, tzinfo=tzlocal()), 'Data': b'{"transaction_ts": 1601510404, "symbol": "ETH_USD", "price": 360' b'.12, "amount": 0.523, "dollar_amount": 188.34276, "type": "buy",' b' "trans_id": 124289045}\n', 'PartitionKey': 'ETH_USD', 'SequenceNumber': '49617445977150094507622122578779878996435359514497122306'}, {'ApproximateArrivalTimestamp': datetime.datetime(2021, 4, 18, 14, 18, 25, 545000, tzinfo=tzlocal()), 'Data': b'{"transaction_ts": 1601510405, "symbol": "BTC_USD", "price": 107' b'84.42, "amount": 0.25635676, "dollar_amount": 2764.65897, "type"' b': "buy", "trans_id": 124289050}\n', 'PartitionKey': 'BTC_USD', 'SequenceNumber': '49617445977150094507622122578781087922254974143671828482'}, {'ApproximateArrivalTimestamp': datetime.datetime(2021, 4, 18, 14, 18, 25, 663000, tzinfo=tzlocal()), 'Data': b'{"transaction_ts": 1601510407, "symbol": "BTC_USD", "price": 107' b'84.42, "amount": 0.23877038, "dollar_amount": 2575.000061, "type' b'": "buy", "trans_id": 124289051}\n', 'PartitionKey': 'BTC_USD', 'SequenceNumber': '49617445977150094507622122578782296848074588772846534658'}]
print(next_shard_iterator)
response_get_rec = kinesis_client.get_records(ShardIterator = next_shard_iterator , Limit = limit)
pprint(response_get_rec["Records"])
AAAAAAAAAAGUetfZYhJAUbRmLdnxgHF2gXHQ+Yt8063YzfurEZ+Vdauri9LJ13JLrPqLIrBxeHRJ1GEBctxNJ4jYeB4Um/JNu4+2L5Jfa1Apl9s9y6f/5UMZlqIAFGvUPmW53Gj6MyauM9r7EWNBUBZCvrFQkHvC9fQwNYP3eyYm1xp4K9fcjBX90qUdnGmFU69bq+3BF5I7PXgPHitcwzJev6PqPLVny2SmhSHtnRF/Rogj00Xv+DtKo1/SBdVid3tyQ0e9tm4XrgttPfPIhNsJB3j57lpM [{'ApproximateArrivalTimestamp': datetime.datetime(2021, 4, 18, 14, 18, 25, 812000, tzinfo=tzlocal()), 'Data': b'{"transaction_ts": 1601510409, "symbol": "BTC_USD", "price": 107' b'84.42, "amount": 1.01303547, "dollar_amount": 10924.99998, "type' b'": "buy", "trans_id": 124289054}\n', 'PartitionKey': 'BTC_USD', 'SequenceNumber': '49617445977150094507622122578783505773894203402021240834'}, {'ApproximateArrivalTimestamp': datetime.datetime(2021, 4, 18, 14, 18, 25, 930000, tzinfo=tzlocal()), 'Data': b'{"transaction_ts": 1601510410, "symbol": "BTC_USD", "price": 107' b'84.42, "amount": 0.26135077, "dollar_amount": 2818.516471, "type' b'": "buy", "trans_id": 124289055}\n', 'PartitionKey': 'BTC_USD', 'SequenceNumber': '49617445977150094507622122578784714699713818031195947010'}, {'ApproximateArrivalTimestamp': datetime.datetime(2021, 4, 18, 14, 18, 26, 48000, tzinfo=tzlocal()), 'Data': b'{"transaction_ts": 1601510413, "symbol": "ETH_USD", "price": 360' b'.39, "amount": 5.55416701, "dollar_amount": 2001.666249, "type":' b' "buy", "trans_id": 124289059}\n', 'PartitionKey': 'ETH_USD', 'SequenceNumber': '49617445977150094507622122578785923625533432660370653186'}, {'ApproximateArrivalTimestamp': datetime.datetime(2021, 4, 18, 14, 18, 26, 165000, tzinfo=tzlocal()), 'Data': b'{"transaction_ts": 1601510414, "symbol": "ETH_USD", "price": 360' b'.6, "amount": 13.855, "dollar_amount": 4996.113, "type": "buy", ' b'"trans_id": 124289071}\n', 'PartitionKey': 'ETH_USD', 'SequenceNumber': '49617445977150094507622122578787132551353047358264836098'}, {'ApproximateArrivalTimestamp': datetime.datetime(2021, 4, 18, 14, 18, 26, 282000, tzinfo=tzlocal()), 'Data': b'{"transaction_ts": 1601510415, "symbol": "ETH_USD", "price": 360' b'.24, "amount": 6.32869733, "dollar_amount": 2279.849926, "type":' b' "sell", "trans_id": 124289072}\n', 'PartitionKey': 'ETH_USD', 'SequenceNumber': '49617445977150094507622122578788341477172661987439542274'}]