File size: 2,606 Bytes
00621f5
 
 
 
53332ed
 
00621f5
53332ed
 
00621f5
53332ed
 
00621f5
 
53332ed
 
 
 
 
 
c219cdc
00621f5
 
 
 
53332ed
00621f5
 
 
 
53332ed
 
00621f5
 
53332ed
00621f5
c219cdc
 
 
 
 
 
00621f5
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import gradio as gr
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline


class UrlClassifier:
    def __init__(self):
        self.pipeline = pipeline(
            "zero-shot-classification",
            model="MoritzLaurer/DeBERTa-v3-large-mnli-fever-anli-ling-wanli"
        )
        self.candidate_labels = ["Apps", "Arts and Entertainment", "Automotive Ownership", "Automotive Products", "Beauty Services", "Business and Industrial", "Clothing and Accessories", "Collectables and Antiques", "Consumer Electronics", "Edible Consumer Packaged Goods", "Education and Careers", "Family and Parenting", "Finance and Insurance", "Food and Beverage Services", "Furniture", "Gifts and Holiday Items", "Hardware Supplies", "Health and Medical Services", "Hobbies and Interests", "Home and Garden Services", "Legal Services", "Life Events", "Logistics and Delivery", "Non-Profits", "Office Equipment and Supplies", "Pet Services", "Pharmaceuticals", "Real Estate", "Recreation and Fitness Activities", "Software", "Sporting Goods", "Travel and Tourism", "Web Services"]


    def predict(self, input_text: str):
        pred = self.pipeline(input_text, self.candidate_labels)
        
        sorted_preds = sorted(zip(pred['scores'], pred['labels']))
        result = {}
        for i in range(len(sorted_preds)):
            result[sorted_preds[i][1]] = sorted_preds[i][0]

        return result


def main():
    model = UrlClassifier()
    iface = gr.Interface(
        fn=model.predict,
        inputs=gr.inputs.Textbox(
            lines=3,
            placeholder="Input a shopping website URL",
            label="Input URL",
        ),
        outputs="label",
        title="Ecommerce URL Classification",
        examples=[
            "https://www.homedepot.com/p/MINKA-AIRE-Concept-II-44-in-Integrated-LED-Indoor-White-Ceiling-Fan-with-Light-with-Remote-Control-F518L-WH/310172695",
            "https://www.allsaints.com/us/men/t-shirts/pinup-crew-t-shirt/USMG127X-162.html",
            "https://www.ikea.com/us/en/cat/multifunctional-tables-57537/",
            "https://www.rei.com/adventures/all-trips?cm_mmc=email_com_gm-_-20230102_ADV_AdventuresJanuary-_-010223-_-CTA1_NP_Adventures_23_00005&ev36=555141764&rmid=20230102_ADV_AdventuresJanuary&rrid=932052075&e",
            "https://www.dsw.com/en/us/product/adidas-fluid-flow-2.0-running-shoe---womens/512360",
            "https://www.homedepot.com/b/Tools-Power-Tools-Saws-Scroll-Saws/N-5yc1vZc67a?sortorder=desc&sortby=price"
        ],
    )

    iface.launch()


if __name__ == "__main__":
    main()