pypi_scrape

get data from pythong package index
git clone git://popovic.xyz/pypi_scrape.git
Log | Files | Refs

commit a259cacf0fe5b3af1e177af50094e1d02f73ab79
parent caec326be90d533496ca764032205a96c78371c2
Author: miksa234 <milutin@popovic.xyz>
Date:   Sun, 17 Apr 2022 21:56:18 +0200

initilize time

Diffstat:
Mget_dependecies.py | 30+++++++++++++++++++++++-------
1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/get_dependecies.py b/get_dependecies.py @@ -3,37 +3,53 @@ import pypi_xmlrpc import re import requests +from datetime import datetime -def main() +def main(): url = 'https://pypi.org/pypi/{}/json' packages = pypi_xmlrpc.list_packages() - f = open('test.csv', 'w') - f.write("package|requirement\n") + path = './data/' + files = {} for i, package in enumerate(packages): - try: json = requests.get(url.format(package)).json() except: print("ERROR") continue + catch = True; j = 0 + while catch: + try: + timestr = list(json['releases'].items())[j][1][0]['upload_time'] + release = datetime.strptime(timestr, "%Y-%m-%dT%H:%M:%S") + catch = False + except: + j += 1 + try: + files[f'{release.year}-{release.month}'].closed + except: + files[f'{release.year}-{release.month}'] = open(path + f'{release.year}-{release.month}.csv', "w") + files[f'{release.year}-{release.month}'].write('package|requirement\n') + try: needs = list(dict.fromkeys([re.sub(r' (.*)', '', d) for d in json['info']['requires_dist']])) except: - f.write(f'{package}|\n') # create standalone node + files[f'{release.year}-{release.month}'].write(f'{package}|\n') # create standalone node continue for req in needs: if package == req: # avoid self loops continue else: - f.write(f'{package}|{req}\n') + files[f'{release.year}-{release.month}'].write(f'{package}|{req}\n') print(package) - f.close() + for file in files: + files[file].close() + if __name__ == '__main__': main()