pypi_scrape

get data from pythong package index
git clone git://popovic.xyz/pypi_scrape.git
Log | Files | Refs | LICENSE

get_dependecies.py (1715B)


      1 #!/usr/bin/env python3.6
      2 
      3 import pypi_xmlrpc
      4 import re
      5 import requests
      6 from datetime import datetime
      7 
      8 
      9 def main():
     10     url = 'https://pypi.org/pypi/{}/json'
     11     packages = pypi_xmlrpc.list_packages()
     12 
     13     path = './data/'
     14     files = {}
     15 
     16     for i, package in enumerate(packages[:100]):
     17         try:
     18             json = requests.get(url.format(package)).json()
     19         except:
     20             print("ERROR")
     21             continue
     22 
     23         catch = True; j = 0
     24         while catch:
     25             try:
     26                 timestr = list(json['releases'].items())[j][1][0]['upload_time']
     27                 release = datetime.strptime(timestr, "%Y-%m-%dT%H:%M:%S")
     28                 catch = False
     29             except:
     30                 j += 1
     31                 if j == 10:
     32                     print("NO TIME")
     33                     break
     34         if j == 10:
     35             continue
     36 
     37         try:
     38             files[f'{release.year}-{release.month}'].closed
     39         except:
     40             files[f'{release.year}-{release.month}'] = open(path + f'{release.year}-{release.month}.csv', "w")
     41             files[f'{release.year}-{release.month}'].write('package|requirement\n')
     42 
     43         try:
     44             needs = list(dict.fromkeys([re.sub(r' (.*)', '', d) for d in json['info']['requires_dist']]))
     45         except:
     46             files[f'{release.year}-{release.month}'].write(f'{package}|\n')  # create standalone node
     47             continue
     48 
     49         for req in needs:
     50             if package == req: # avoid self loops
     51                 continue
     52             else:
     53                 files[f'{release.year}-{release.month}'].write(f'{package}|{req}\n')
     54         print(package)
     55 
     56     for file in files:
     57         files[file].close()
     58 
     59 
     60 if __name__ == '__main__':
     61     main()