-
Notifications
You must be signed in to change notification settings - Fork 304
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add IFE task page and edit FE task page (#559)
Co-authored-by: Omar Sanseviero <[email protected]> Co-authored-by: Pedro Cuenca <[email protected]>
- Loading branch information
1 parent
8865043
commit e16e823
Showing
5 changed files
with
79 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
23 changes: 23 additions & 0 deletions
23
packages/tasks/src/tasks/image-feature-extraction/about.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
## Use Cases | ||
|
||
### Transfer Learning | ||
|
||
Models trained on a specific dataset can learn features about the data. For instance, a model trained on a car classification dataset learns to recognize edges and curves on a very high level and car-specific features on a low level. This information can be transferred to a new model that is going to be trained on classifying trucks. This process of extracting features and transferring to another model is called transfer learning. | ||
|
||
### Similarity | ||
|
||
Features extracted from models contain semantically meaningful information about the world. These features can be used to detect the similarity between two images. Assume there are two images: a photo of a stray cat in a street setting and a photo of a cat at home. These images both contain cats, and the features will contain the information that there's a cat in the image. Thus, comparing the features of a stray cat photo to the features of a domestic cat photo will result in higher similarity compared to any other image that doesn't contain any cats. | ||
|
||
## Inference | ||
|
||
```python | ||
import torch | ||
from transformers import pipeline | ||
|
||
pipe = pipeline(task="image-feature-extraction", model_name="google/vit-base-patch16-384", framework="pt", pool=True) | ||
pipe("https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/cats.png") | ||
|
||
feature_extractor(text,return_tensors = "pt")[0].numpy().mean(axis=0) | ||
|
||
'[[[0.21236686408519745, 1.0919708013534546, 0.8512550592422485, ...]]]' | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
import type { TaskDataCustom } from ".."; | ||
|
||
const taskData: TaskDataCustom = { | ||
datasets: [ | ||
{ | ||
description: | ||
"ImageNet-1K is a image classification dataset in which images are used to train image-feature-extraction models.", | ||
id: "imagenet-1k", | ||
}, | ||
], | ||
demo: { | ||
inputs: [ | ||
{ | ||
filename: "mask-generation-input.png", | ||
type: "img", | ||
}, | ||
], | ||
outputs: [ | ||
{ | ||
table: [ | ||
["Dimension 1", "Dimension 2", "Dimension 3"], | ||
["0.21236686408519745", "1.0919708013534546", "0.8512550592422485"], | ||
["0.809657871723175", "-0.18544459342956543", "-0.7851548194885254"], | ||
["1.3103108406066895", "-0.2479034662246704", "-0.9107287526130676"], | ||
["1.8536205291748047", "-0.36419737339019775", "0.09717650711536407"], | ||
], | ||
type: "tabular", | ||
}, | ||
], | ||
}, | ||
metrics: [], | ||
models: [ | ||
{ | ||
description: "A powerful image feature extraction model.", | ||
id: "timm/vit_large_patch14_dinov2.lvd142m", | ||
}, | ||
{ | ||
description: "A strong image feature extraction model.", | ||
id: "google/vit-base-patch16-224-in21k", | ||
}, | ||
{ | ||
description: "A robust image feature extraction models.", | ||
id: "facebook/dino-vitb16", | ||
}, | ||
], | ||
spaces: [], | ||
summary: "Image feature extraction is the task of extracting features learnt in a computer vision model.", | ||
widgetModels: [], | ||
}; | ||
|
||
export default taskData; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters