From 31edffb4c5c0f2847f4a4da68f1b570b4b244d45 Mon Sep 17 00:00:00 2001 From: Mustafa Gezen Date: Sun, 7 May 2023 03:44:56 +0200 Subject: [PATCH] Add osv-gcs-exporter and fix before/after for OSV API (#6) --- apollo/osv-gcs-exporter/__main__.py | 77 ++++++++++++++++++++++++ apollo/osv-gcs-exporter/requirements.txt | 1 + apollo/server/routes/api_osv.py | 6 +- 3 files changed, 82 insertions(+), 2 deletions(-) create mode 100644 apollo/osv-gcs-exporter/__main__.py create mode 100644 apollo/osv-gcs-exporter/requirements.txt diff --git a/apollo/osv-gcs-exporter/__main__.py b/apollo/osv-gcs-exporter/__main__.py new file mode 100644 index 0000000..45fc2ed --- /dev/null +++ b/apollo/osv-gcs-exporter/__main__.py @@ -0,0 +1,77 @@ +""" +Export OSV data to GCS. + +Individual JSON files are exported to the following path: + gs:///.json +""" +import argparse +import requests +import json +from datetime import datetime, timedelta + +from google.cloud import storage + + +def main(args): + # Connect to GCS + client = storage.Client() + bucket = client.get_bucket(args.bucket) + + time_after = None + if args.nightly: + time_after = (datetime.now() - timedelta(hours=26)).isoformat("T") + + page = 1 + while True: + r = requests.get( + "https://apollo.build.resf.org/api/v3/osv", + params={ + "page": page, + "limit": 100, + "after": time_after, + }, + timeout=60, + ) + r.raise_for_status() + advisories = r.json()["advisories"] + if not advisories: + break + + for advisory in advisories: + advisory_json = json.dumps(advisory) + + file_name = f"{advisory['id']}.json" + + # Skip if file already exists + blob = bucket.blob(file_name) + if blob.exists(): + print(f"Skipping {file_name} (already exists)") + continue + + # Upload to GCS + blob = bucket.blob(file_name) + blob.upload_from_string(advisory_json) + print(f"Uploaded {file_name}") + + page += 1 + + print("Done!") + + +if __name__ == "__main__": + # Parse arguments + parser = argparse.ArgumentParser() + parser.add_argument( + "--bucket", + help="GCS bucket to export to", + required=True, + ) + # Flag for nightly job + parser.add_argument( + "--nightly", + help="Run as nightly job", + action="store_true", + ) + + # Send args to main + main(parser.parse_args()) diff --git a/apollo/osv-gcs-exporter/requirements.txt b/apollo/osv-gcs-exporter/requirements.txt new file mode 100644 index 0000000..d265caf --- /dev/null +++ b/apollo/osv-gcs-exporter/requirements.txt @@ -0,0 +1 @@ +google-cloud-storage==2.8.0 \ No newline at end of file diff --git a/apollo/server/routes/api_osv.py b/apollo/server/routes/api_osv.py index 950b6ab..54b80e6 100644 --- a/apollo/server/routes/api_osv.py +++ b/apollo/server/routes/api_osv.py @@ -1,3 +1,5 @@ +import datetime + from typing import TypeVar, Generic, Optional from fastapi import APIRouter, Depends @@ -236,8 +238,8 @@ def to_osv_advisory(ui_url: str, advisory: Advisory) -> OSVAdvisory: async def get_advisories_osv( params: Params = Depends(), product: Optional[str] = None, - before: Optional[str] = None, - after: Optional[str] = None, + before: Optional[datetime.datetime] = None, + after: Optional[datetime.datetime] = None, cve: Optional[str] = None, synopsis: Optional[str] = None, keyword: Optional[str] = None,