--- tags: - setfit - sentence-transformers - text-classification - generated_from_setfit_trainer widget: - text: '{"@id": "hub:69f82e716acd4d5fae6e985152e8a192", "@type": "print:PrintRecord", "extraction:hasActivity": {"@id": "extraction:7028d9b4-9daf-4ebb-8218-42c6f30eefa9"}, "print:hasColourDetails": "4 CP, UV Fade resistant inks", "print:hasCreatedDate": {"@type": "http://www.w3.org/2001/XMLSchema#date", "@value": "2024-03-14"}, "print:hasCurrencyCode": "USD", "print:hasCustomerHomeCountry": "United States", "print:hasCustomerID": 31180, "print:hasCustomerName": "Scotts Miracle-Gro(SMG-POP)", "print:hasCutting": "Trim to size", "print:hasElementID": 3175875, "print:hasElementTitle": "W198251 SMG FY24 Lowes BOAS POP 17.5 x 3.85", "print:hasFinishedQuantity": 9600, "print:hasFinishedSizeHeight": {"@type": "http://www.w3.org/2001/XMLSchema#decimal", "@value": "3.85"}, "print:hasFinishedSizeWidth": {"@type": "http://www.w3.org/2001/XMLSchema#decimal", "@value": "17.5"}, "print:hasFlatSizeHeight": {"@type": "http://www.w3.org/2001/XMLSchema#decimal", "@value": "3.85"}, "print:hasFlatSizeWidth": {"@type": "http://www.w3.org/2001/XMLSchema#decimal", "@value": "17.5"}, "print:hasFscPaperBeenSpecified": "No", "print:hasInternalID": "77ea840f-5190-4464-b5ff-f70a2c9904a4", "print:hasMaterialCategory": "Plastic", "print:hasMaterialDescription": "20 mil white styrene", "print:hasMaterialRecycledPercentage": "0%", "print:hasMaterialThicknessOrWeight": 20, "print:hasMaterialType": "Polystyrene", "print:hasMaterialUnitOfMeasure": "Millimetres (mm)", "print:hasNumberOfVersions": 1, "print:hasPackingRequirements": "Bundle in sets of 4, bulk pack and ship to Temecula", "print:hasPermutationID": "69f82e71-6acd-4d5f-ae6e-985152e8a192", "print:hasPrice": {"@type": "http://www.w3.org/2001/XMLSchema#decimal", "@value": "6144.0"}, "print:hasPrintedSides": "Single sided", "print:hasProductCategory": "Indoor/Outdoor Signage", "print:hasProofType": "PDF digital proof", "print:hasQuantity": 9600, "print:hasRecycledContentBeenOffered": "No", "print:hasSendToDetails": "karen.wessel@hhglobal.com", "print:hasSupplierName": "M&M Displays Inc-HHG Strategic Partner(M&M Displays Inc - 33682 - HHGSP - US Only)", "print:hasTotalColours": 4, "print:hasUnitOfMeasure": "Inches (in)"}' - text: '{"@id": "hub:dbe34397419449cd99154d8ce74f83d4", "@type": "print:PrintRecord", "extraction:hasActivity": {"@id": "extraction:7028d9b4-9daf-4ebb-8218-42c6f30eefa9"}, "print:hasCreatedDate": {"@type": "http://www.w3.org/2001/XMLSchema#date", "@value": "2024-04-30"}, "print:hasCurrencyCode": "USD", "print:hasCustomerHomeCountry": "United States", "print:hasCustomerID": 31736, "print:hasCustomerName": "AutoZone(AutoZone)", "print:hasCutting": "Trim to size", "print:hasElementID": 3262919, "print:hasElementTitle": "6x4 APCs", "print:hasFinishedQuantity": 6396, "print:hasFinishedSizeHeight": 4, "print:hasFinishedSizeWidth": 6, "print:hasFscPaperBeenSpecified": "No", "print:hasInternalID": "1ce94b3f-83a2-42c3-90c8-9eeb08342d1f", "print:hasMaterialCategory": "Other", "print:hasMaterialDescription": "APCs", "print:hasMaterialType": "Other", "print:hasNumberOfVersions": 551024, "print:hasPermutationID": "dbe34397-4194-49cd-9915-4d8ce74f83d4", "print:hasPrice": {"@type": "http://www.w3.org/2001/XMLSchema#decimal", "@value": "95878.18"}, "print:hasPrintedSides": "Single sided", "print:hasProofType": "PDF digital proof", "print:hasQuantity": 6396, "print:hasRecycledContentBeenOffered": "N/A", "print:hasSupplierName": "Earth Thebault/Mittera New Jersey(Mittera Group, Inc. - 47568 - HHGSP - ISR)", "print:hasTotalColours": 4, "print:hasUnitOfMeasure": "Inches (in)"}' - text: '{"@id": "hub:d7ae357afcee48c5bc042a2270c264bb", "@type": "print:PrintRecord", "extraction:hasActivity": {"@id": "extraction:7028d9b4-9daf-4ebb-8218-42c6f30eefa9"}, "print:hasAdditionalInformation": "Color Wall Accents (deep orange, bright green, sky blue, midtone orange); branded purpose statement, 8 versions", "print:hasCreatedDate": {"@type": "http://www.w3.org/2001/XMLSchema#date", "@value": "2024-05-13"}, "print:hasCurrencyCode": "USD", "print:hasCustomerHomeCountry": "United States", "print:hasCustomerID": 38262, "print:hasCustomerName": "Architecture Products Group (APG)(Architecture Products Group (APG) (USA))", "print:hasCutting": "Trim to size", "print:hasElementID": 3284126, "print:hasElementTitle": "Photo paper prints Color Wall Accents (deep orange, bright green, sky blue, midtone orange); bran...", "print:hasFinishedQuantity": 12, "print:hasFinishedSizeHeight": 1, "print:hasFinishedSizeWidth": 1, "print:hasFscPaperBeenSpecified": "No", "print:hasInternalID": "140d4809-bbf1-4b5f-b341-1b7501243aad", "print:hasMaterialCategory": "Plastic", "print:hasMaterialDescription": "Vinyl", "print:hasMaterialRecycledPercentage": "0%", "print:hasMaterialThicknessOrWeight": 1, "print:hasMaterialType": "PVC", "print:hasMaterialUnitOfMeasure": "Pounds (lbs)", "print:hasNumberOfVersions": 12, "print:hasPermutationID": "d7ae357a-fcee-48c5-bc04-2a2270c264bb", "print:hasPrice": {"@type": "http://www.w3.org/2001/XMLSchema#decimal", "@value": "122.4"}, "print:hasPrintedSides": "Single sided", "print:hasProductCategory": "Indoor/Outdoor Signage", "print:hasProofType": "PDF digital proof", "print:hasQuantity": 12, "print:hasRecycledContentBeenOffered": "No", "print:hasSupplierName": "Firehouse Image Center(Firehouse Image Center - 12168 - HHGSP)", "print:hasTotalColours": 4, "print:hasUnitOfMeasure": "Inches (in)"}' - text: '{"@id": "hub:8671421678e8439f9c4c15fe6dcd5342", "@type": "print:PrintRecord", "extraction:hasActivity": {"@id": "extraction:7028d9b4-9daf-4ebb-8218-42c6f30eefa9"}, "print:hasCreatedDate": {"@type": "http://www.w3.org/2001/XMLSchema#date", "@value": "2024-06-03"}, "print:hasCurrencyCode": "USD", "print:hasCustomerHomeCountry": "United States", "print:hasCustomerID": 39317, "print:hasCustomerName": "Gannett, Inc(Consumer Marketing Services)", "print:hasCutting": "Trim to size", "print:hasElementID": 3325140, "print:hasElementTitle": "with remit", "print:hasFinishedQuantity": 17971, "print:hasFscPaperBeenSpecified": "No", "print:hasInternalID": "cbf2a0fd-4443-47b7-90cd-79e268477cd4", "print:hasMaterialCategory": "Paper", "print:hasMaterialDescription": "Paper", "print:hasMaterialType": "Paper", "print:hasMaterialUnitOfMeasure": "Pounds (lbs)", "print:hasPermutationID": "86714216-78e8-439f-9c4c-15fe6dcd5342", "print:hasPrice": {"@type": "http://www.w3.org/2001/XMLSchema#decimal", "@value": "7500.4"}, "print:hasPrintedSides": "Double sided", "print:hasProofType": "No proof required", "print:hasQuantity": 17971, "print:hasRecycledContentBeenOffered": "N/A", "print:hasSupplierName": "TrueSense Marketing Inc(TrueSense Marketing Inc - HHGSP - PI - ISR)", "print:hasUnitOfMeasure": "Inches (in)"}' - text: '{"@id": "hub:5c18ef55967a43a281321467fe3452f4", "@type": "print:PrintRecord", "extraction:hasActivity": {"@id": "extraction:7028d9b4-9daf-4ebb-8218-42c6f30eefa9"}, "print:TotalColoursDoubleSidedSame": 4, "print:coloursToFaceAndReverseAreSame": "Yes", "print:hasColourDetails": "4 color process", "print:hasCreatedDate": {"@type": "http://www.w3.org/2001/XMLSchema#date", "@value": "2024-05-14"}, "print:hasCurrencyCode": "USD", "print:hasCustomerHomeCountry": "United States", "print:hasCustomerID": 30642, "print:hasCustomerName": "Station Casinos LLC(Station Casinos)", "print:hasCutting": "Trim to size", "print:hasElementID": 3286957, "print:hasElementTitle": "208101 - 10\" x 10\" DS Tile - NO VELCRO", "print:hasFinishedQuantity": 10, "print:hasFinishedSizeHeight": 10, "print:hasFinishedSizeWidth": 10, "print:hasFscPaperBeenSpecified": "No", "print:hasHandFinishing": "Yes", "print:hasInternalID": "5439740f-0593-484a-8f03-cd67e125d36a", "print:hasMaterialCategory": "Other", "print:hasMaterialDescription": "3/16\" W/W/W Foamcore", "print:hasMaterialType": "Other", "print:hasNumberOfVersions": 1, "print:hasPackingRequirements": "DELIVER TO PALACE", "print:hasPermutationID": "5c18ef55-967a-43a2-8132-1467fe3452f4", "print:hasPrice": {"@type": "http://www.w3.org/2001/XMLSchema#decimal", "@value": "26.0"}, "print:hasPrintedSides": "Double sided", "print:hasProofType": "No proof required", "print:hasQuantity": 10, "print:hasRecycledContentBeenOffered": "No", "print:hasSupplierName": "Quick Change Display(Quick Change Display - HHGSP)", "print:hasTotalColours": 4, "print:hasUnitOfMeasure": "Inches (in)"}' metrics: - accuracy pipeline_tag: text-classification library_name: setfit inference: true base_model: intfloat/e5-base --- # SetFit with intfloat/e5-base This is a [SetFit](https://github.com/huggingface/setfit) model that can be used for Text Classification. This SetFit model uses [intfloat/e5-base](https://huggingface.co/intfloat/e5-base) as the Sentence Transformer embedding model. A [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance is used for classification. The model has been trained using an efficient few-shot learning technique that involves: 1. Fine-tuning a [Sentence Transformer](https://www.sbert.net) with contrastive learning. 2. Training a classification head with features from the fine-tuned Sentence Transformer. ## Model Details ### Model Description - **Model Type:** SetFit - **Sentence Transformer body:** [intfloat/e5-base](https://huggingface.co/intfloat/e5-base) - **Classification head:** a [LogisticRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html) instance - **Maximum Sequence Length:** 512 tokens - **Number of Classes:** 37 classes ### Model Sources - **Repository:** [SetFit on GitHub](https://github.com/huggingface/setfit) - **Paper:** [Efficient Few-Shot Learning Without Prompts](https://arxiv.org/abs/2209.11055) - **Blogpost:** [SetFit: Efficient Few-Shot Learning Without Prompts](https://huggingface.co/blog/setfit) ### Model Labels | Label | Examples | |:------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | 1 | | | 2 | | | 3 | | | 0 | | | 4 | | | 5 | | | 6 | | | 7 | | | 8 | | | 9 | | | 10 | | | 11 | | | 12 | | | 13 | | | 14 | | | 15 | | | 16 | | | 17 | | | 18 | | | 19 | | | 20 | | | 21 | | | 22 | | | 23 | | | 24 | | | 25 | | | 26 | | | 27 | | | 28 | | | 29 | | | 30 | | | 31 | | | 32 | | | 33 | | | 34 | | | 35 | | | 36 | | ## Uses ### Direct Use for Inference First install the SetFit library: ```bash pip install setfit ``` Then you can load this model and run inference. ```python from setfit import SetFitModel # Download from the 🤗 Hub model = SetFitModel.from_pretrained("Northell/ros-classifiers") # Run inference preds = model("{\"@id\": \"hub:8671421678e8439f9c4c15fe6dcd5342\", \"@type\": \"print:PrintRecord\", \"extraction:hasActivity\": {\"@id\": \"extraction:7028d9b4-9daf-4ebb-8218-42c6f30eefa9\"}, \"print:hasCreatedDate\": {\"@type\": \"http://www.w3.org/2001/XMLSchema#date\", \"@value\": \"2024-06-03\"}, \"print:hasCurrencyCode\": \"USD\", \"print:hasCustomerHomeCountry\": \"United States\", \"print:hasCustomerID\": 39317, \"print:hasCustomerName\": \"Gannett, Inc(Consumer Marketing Services)\", \"print:hasCutting\": \"Trim to size\", \"print:hasElementID\": 3325140, \"print:hasElementTitle\": \"with remit\", \"print:hasFinishedQuantity\": 17971, \"print:hasFscPaperBeenSpecified\": \"No\", \"print:hasInternalID\": \"cbf2a0fd-4443-47b7-90cd-79e268477cd4\", \"print:hasMaterialCategory\": \"Paper\", \"print:hasMaterialDescription\": \"Paper\", \"print:hasMaterialType\": \"Paper\", \"print:hasMaterialUnitOfMeasure\": \"Pounds (lbs)\", \"print:hasPermutationID\": \"86714216-78e8-439f-9c4c-15fe6dcd5342\", \"print:hasPrice\": {\"@type\": \"http://www.w3.org/2001/XMLSchema#decimal\", \"@value\": \"7500.4\"}, \"print:hasPrintedSides\": \"Double sided\", \"print:hasProofType\": \"No proof required\", \"print:hasQuantity\": 17971, \"print:hasRecycledContentBeenOffered\": \"N/A\", \"print:hasSupplierName\": \"TrueSense Marketing Inc(TrueSense Marketing Inc - HHGSP - PI - ISR)\", \"print:hasUnitOfMeasure\": \"Inches (in)\"}") ``` ## Training Details ### Training Set Metrics | Training set | Min | Median | Max | |:-------------|:----|:---------|:----| | Word count | 81 | 133.9019 | 289 | | Label | Training Sample Count | |:------|:----------------------| | 0 | 110 | | 1 | 1 | | 2 | 39 | | 3 | 1 | | 4 | 3 | | 5 | 2 | | 6 | 7 | | 7 | 2 | | 8 | 1 | | 9 | 1 | | 10 | 2 | | 11 | 4 | | 12 | 8 | | 13 | 1 | | 14 | 2 | | 15 | 3 | | 16 | 1 | | 17 | 1 | | 18 | 1 | | 19 | 1 | | 20 | 1 | | 21 | 1 | | 22 | 1 | | 23 | 1 | | 24 | 7 | | 25 | 1 | | 26 | 1 | | 27 | 1 | | 28 | 1 | | 29 | 1 | | 30 | 1 | | 31 | 1 | | 32 | 1 | | 33 | 1 | | 34 | 1 | | 35 | 1 | | 36 | 1 | ### Training Hyperparameters - batch_size: (16, 2) - num_epochs: (1, 64) - max_steps: -1 - sampling_strategy: undersampling - body_learning_rate: (2e-05, 1e-05) - head_learning_rate: 0.01 - loss: CosineSimilarityLoss - distance_metric: cosine_distance - margin: 0.25 - end_to_end: False - use_amp: False - warmup_proportion: 0.1 - l2_weight: 0.01 - seed: 42 - eval_max_steps: -1 - load_best_model_at_end: False ### Training Results | Epoch | Step | Training Loss | Validation Loss | |:------:|:----:|:-------------:|:---------------:| | 0.0011 | 1 | 0.2809 | - | | 0.0568 | 50 | 0.3179 | - | | 0.1136 | 100 | 0.1994 | - | | 0.1705 | 150 | 0.1105 | - | | 0.2273 | 200 | 0.0543 | - | | 0.2841 | 250 | 0.0416 | - | | 0.3409 | 300 | 0.0398 | - | | 0.3977 | 350 | 0.0252 | - | | 0.4545 | 400 | 0.0328 | - | | 0.5114 | 450 | 0.0211 | - | | 0.5682 | 500 | 0.0202 | - | | 0.625 | 550 | 0.0139 | - | | 0.6818 | 600 | 0.0132 | - | | 0.7386 | 650 | 0.0105 | - | | 0.7955 | 700 | 0.0074 | - | | 0.8523 | 750 | 0.01 | - | | 0.9091 | 800 | 0.0104 | - | | 0.9659 | 850 | 0.0193 | - | ### Framework Versions - Python: 3.10.12 - SetFit: 1.1.2 - Sentence Transformers: 4.1.0 - Transformers: 4.52.4 - PyTorch: 2.7.1+cu126 - Datasets: 3.4.1 - Tokenizers: 0.21.1 ## Citation ### BibTeX ```bibtex @article{https://doi.org/10.48550/arxiv.2209.11055, doi = {10.48550/ARXIV.2209.11055}, url = {https://arxiv.org/abs/2209.11055}, author = {Tunstall, Lewis and Reimers, Nils and Jo, Unso Eun Seo and Bates, Luke and Korat, Daniel and Wasserblat, Moshe and Pereg, Oren}, keywords = {Computation and Language (cs.CL), FOS: Computer and information sciences, FOS: Computer and information sciences}, title = {Efficient Few-Shot Learning Without Prompts}, publisher = {arXiv}, year = {2022}, copyright = {Creative Commons Attribution 4.0 International} } ```