-
Notifications
You must be signed in to change notification settings - Fork 0
/
data_integration.py
48 lines (39 loc) · 2.36 KB
/
data_integration.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import argparse
import importlib
from data_integration.dataset2class import dataset2class
def get_dataset_class(dataset):
module_name = f'data_integration.{dataset2class[dataset]["submodule"]}'
class_name = dataset2class[dataset]['class']
return module_name, class_name
def main():
parser = argparse.ArgumentParser(
description="Script for Data Integration between DBpedia and some standard Recommender System datasets."
)
parser.add_argument('-d', '--dataset', type=str, required=True, help='Choose a supported RS dataset.')
parser.add_argument('-i', '--input_path', type=str, required=True, help='Path where the dataset is located.')
parser.add_argument('-o', '--output_path', type=str, required=True, help='Path where the processed dataset will be placed.')
parser.add_argument('-ci', '--convert_item', action='store_true', help='Use this flag if you want to convert item data.')
parser.add_argument('-cu', '--convert_user', action='store_true', help='Use this flag if you want to convert user data.')
parser.add_argument('-cr', '--convert_rating', action='store_true', help='Use this flag if you want to convert rating data.')
parser.add_argument('-cs', '--convert_social', action='store_true', help='Use this flag if you want to convert social links data.')
parser.add_argument('-map', '--map_URIs', action='store_true', help='Use this flag if you want to map dataset items with DBpedia.')
parser.add_argument('-enrich', '--enrich_data', action='store_true', help='Use this flag if you want to enrich dataset with DBpedia.')
parser.add_argument('-w', '--n_workers', type=int, default=1, help='Choose the number of workers(threads) to be used for parallel queries.')
args = parser.parse_args()
module_name, class_name = get_dataset_class(args.dataset)
dataset = getattr(importlib.import_module(module_name), class_name)
dataset = dataset(args.input_path, args.output_path, n_workers=args.n_workers)
if args.convert_item:
dataset.convert_item_data()
if args.convert_user:
dataset.convert_user_data()
if args.convert_rating:
dataset.convert_rating_data()
if args.convert_social:
dataset.convert_social_data()
if args.map_URIs:
dataset.map_URIs()
if args.enrich_data:
dataset.enrich_data()
if __name__ == '__main__':
main()