pypi_scrape

get data from pythong package index
git clone git://popovic.xyz/pypi_scrape.git
Log | Files | Refs

commit 63864668b793f22fa56448864a45da17744a5d20
parent 584d9835c1e3b649920cc0bd08cd81928de688c1
Author: miksa234 <milutin@popovic.xyz>
Date:   Tue, 19 Apr 2022 11:51:30 +0200

see last fix

Diffstat:
Msort_data.py | 8++++----
1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/sort_data.py b/sort_data.py @@ -9,7 +9,6 @@ import warnings warnings.simplefilter(action='ignore', category=FutureWarning) def main(): - path = './data/' file_names = sorted(os.listdir(path), key=lambda x: datetime.strptime(x, '%Y-%m.csv')) dframes = {} @@ -18,7 +17,7 @@ def main(): dframes[fname] = pd.read_csv(path + fname, sep='|', keep_default_na=False) cache_list = [['package', 'requirement']] - for i, fname in enumerate(file_names[:10]): + for i, fname in enumerate(file_names): print(fname, f'{round(i/len(file_names)*100, 1)}%', sep='\t') @@ -33,12 +32,13 @@ def main(): cache_list.append([package, requirement]) dframes[fname]['requirement'].iloc[j] = '' - if (index_found := np.where(np.array(cache_list, dtype='object')[:,1] == package)[0]).size != 0: + index_found = np.where(np.array(cache_list, dtype='object')[:,1] == package)[0] + if index_found.size != 0: for i_found in index_found: found_package, found_requirement = cache_list[i_found] dframes[fname].append({'package': found_package, 'requirement': found_requirement},\ ignore_index=True) - del cache_list[i_found] + cache_list = list(np.delete(cache_list, index_found, axis=0)) dframes[fname] = dframes[fname].drop_duplicates()