forked from danslimmon/awsbill2graphite
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathprint_all_csvs.py
executable file
·65 lines (55 loc) · 2.21 KB
/
print_all_csvs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#!/usr/bin/env python
import gzip
import json
import os
import shutil
import sys
import tempfile
import boto3
def all_s3_primary_manifests(objects):
"""Returns the S3 object(s) corresponding to all primary manifests.
`objects` should be an iterable of S3 objects."""
manifests = [o for o in objects if o.key.endswith("Manifest.json")]
# The primary manifest(s) will be the one(s) with the shortest path length
manifests.sort(key=lambda a: len(a.key))
n_slash = manifests[0].key.count("/")
for i in range(len(manifests)-1):
if manifests[i].key.count("/") > n_slash:
break
return manifests[:i]
def print_all_from_s3(s3_path, tempdir, region_name):
"""Outputs all hourly billing reports from the given S3 path to stdout."""
s3 = boto3.resource("s3", region_name=region_name)
bucket = s3.Bucket(s3_path.split("/")[2])
primaries = all_s3_primary_manifests(bucket.objects.all())
# Now we parse the manifest to get the path to the latest billing CSV
s3_csvs = []
for pri in primaries:
manifest = json.loads(pri.get()['Body'].read())
s3_csvs.extend(manifest["reportKeys"])
# Download each billing CSV to a temp directory and decompress
header_written = False
for s3_csv in s3_csvs:
local_path = os.path.join(tempdir, s3_csv.split("/")[-1])
local_file = open(local_path, "w")
obj = [o for o in bucket.objects.filter(Prefix=s3_csv)][0]
local_file.write(obj.get()['Body'].read())
local_file.close()
with gzip.open(local_path, "r") as f:
for line in f:
if line.startswith(
"identity/LineItemId,"
) and header_written:
continue
sys.stdout.write(line)
header_written = True
# Remove these files as we finish with them to save on disk space
os.unlink(local_path)
if __name__ == "__main__":
if os.getenv("REGION_NAME") != '':
region_name = os.getenv("REGION_NAME")
else:
region_name = 'us-west-1'
tempdir = tempfile.mkdtemp(".awsbill")
print_all_from_s3(os.getenv("AWSBILL_REPORT_PATH"), tempdir, region_name)
shutil.rmtree(tempdir)