. | . | Corpus statistics . | Label distributions . | ||||
---|---|---|---|---|---|---|---|
#docs | #types | #tokens | #technology | culture | science | ||
ted | |||||||
ar | 1,112 | 1,066,754 | 15,124 | 384 | 304 | 290 | |
de | 1,063 | 774,734 | 19,826 | 364 | 289 | 276 | |
es | 1,152 | 933,376 | 13,088 | 401 | 312 | 295 | |
ru | 1,010 | 831,873 | 17,020 | 346 | 275 | 261 | |
zh | 1,123 | 1,032,708 | 19,594 | 386 | 315 | 290 | |
gv (HighLan) | |||||||
ar | 2,000 | 325,879 | 13,072 | 510 | 489 | 33 | |
de | 1,481 | 269,470 | 16,031 | 346 | 344 | 42 | |
es | 2,000 | 367,631 | 11,104 | 457 | 387 | 38 | |
ru | 2,000 | 488,878 | 16,157 | 516 | 369 | 62 | |
zh | 2,000 | 528,370 | 18,194 | 499 | 366 | 56 | |
gv (LowLan) | |||||||
am | 39 | 10,589 | 4,047 | 3 | 3 | 1 | |
ay | 674 | 66,076 | 4,939 | 76 | 100 | 46 | |
mk | 1,992 | 388,713 | 29,022 | 343 | 426 | 182 | |
sw | 1,383 | 359,066 | 14,072 | 137 | 110 | 71 | |
tl | 254 | 26,072 | 6,138 | 32 | 67 | 19 |
. | . | Corpus statistics . | Label distributions . | ||||
---|---|---|---|---|---|---|---|
#docs | #types | #tokens | #technology | culture | science | ||
ted | |||||||
ar | 1,112 | 1,066,754 | 15,124 | 384 | 304 | 290 | |
de | 1,063 | 774,734 | 19,826 | 364 | 289 | 276 | |
es | 1,152 | 933,376 | 13,088 | 401 | 312 | 295 | |
ru | 1,010 | 831,873 | 17,020 | 346 | 275 | 261 | |
zh | 1,123 | 1,032,708 | 19,594 | 386 | 315 | 290 | |
gv (HighLan) | |||||||
ar | 2,000 | 325,879 | 13,072 | 510 | 489 | 33 | |
de | 1,481 | 269,470 | 16,031 | 346 | 344 | 42 | |
es | 2,000 | 367,631 | 11,104 | 457 | 387 | 38 | |
ru | 2,000 | 488,878 | 16,157 | 516 | 369 | 62 | |
zh | 2,000 | 528,370 | 18,194 | 499 | 366 | 56 | |
gv (LowLan) | |||||||
am | 39 | 10,589 | 4,047 | 3 | 3 | 1 | |
ay | 674 | 66,076 | 4,939 | 76 | 100 | 46 | |
mk | 1,992 | 388,713 | 29,022 | 343 | 426 | 182 | |
sw | 1,383 | 359,066 | 14,072 | 137 | 110 | 71 | |
tl | 254 | 26,072 | 6,138 | 32 | 67 | 19 |