Cambios
En el instante 10 de octubre de 2025, 7:19:38 UTC,
-
Añadido recurso Whistlerlib: a distributed computing library for exploratory data analysis on large social network datasets a Whistlerlib: a distributed computing library for exploratory data analysis on large social network datasets
f | 1 | { | f | 1 | { |
2 | "author": "A Garcia-Robledo, A Espejel-Trujillo", | 2 | "author": "A Garcia-Robledo, A Espejel-Trujillo", | ||
3 | "author_email": null, | 3 | "author_email": null, | ||
4 | "creator_user_id": "a3da3ec9-3fd4-47a4-8d04-0a90b09614e0", | 4 | "creator_user_id": "a3da3ec9-3fd4-47a4-8d04-0a90b09614e0", | ||
5 | "extras": [ | 5 | "extras": [ | ||
6 | { | 6 | { | ||
7 | "key": "Publicaci\u00f3n", | 7 | "key": "Publicaci\u00f3n", | ||
8 | "value": "Revista" | 8 | "value": "Revista" | ||
9 | }, | 9 | }, | ||
10 | { | 10 | { | ||
11 | "key": "Tipo", | 11 | "key": "Tipo", | ||
12 | "value": "Publicaci\u00f3n" | 12 | "value": "Publicaci\u00f3n" | ||
13 | } | 13 | } | ||
14 | ], | 14 | ], | ||
15 | "groups": [ | 15 | "groups": [ | ||
16 | { | 16 | { | ||
17 | "description": "Este grupo integra las publicaciones | 17 | "description": "Este grupo integra las publicaciones | ||
18 | acad\u00e9micas derivadas de los proyectos de investigaci\u00f3n del | 18 | acad\u00e9micas derivadas de los proyectos de investigaci\u00f3n del | ||
19 | Observatorio Metropolitano CentroGeo. Incluye art\u00edculos | 19 | Observatorio Metropolitano CentroGeo. Incluye art\u00edculos | ||
20 | presentados en congresos nacionales e internacionales, manuscritos en | 20 | presentados en congresos nacionales e internacionales, manuscritos en | ||
21 | formato preprint, cap\u00edtulos de libro y trabajos publicados en | 21 | formato preprint, cap\u00edtulos de libro y trabajos publicados en | ||
22 | revistas cient\u00edficas especializadas. Estos materiales reflejan la | 22 | revistas cient\u00edficas especializadas. Estos materiales reflejan la | ||
23 | labor de investigaci\u00f3n, desarrollo metodol\u00f3gico y | 23 | labor de investigaci\u00f3n, desarrollo metodol\u00f3gico y | ||
24 | an\u00e1lisis territorial del observatorio, contribuyendo al avance | 24 | an\u00e1lisis territorial del observatorio, contribuyendo al avance | ||
25 | del conocimiento en temas urbanos, metropolitanos y geoespaciales.", | 25 | del conocimiento en temas urbanos, metropolitanos y geoespaciales.", | ||
26 | "display_name": "Publicaciones", | 26 | "display_name": "Publicaciones", | ||
27 | "id": "a15a6b77-ddf5-4594-acab-7e772938a5b0", | 27 | "id": "a15a6b77-ddf5-4594-acab-7e772938a5b0", | ||
28 | "image_display_url": "", | 28 | "image_display_url": "", | ||
29 | "name": "publicaciones", | 29 | "name": "publicaciones", | ||
30 | "title": "Publicaciones" | 30 | "title": "Publicaciones" | ||
31 | } | 31 | } | ||
32 | ], | 32 | ], | ||
33 | "id": "b3a9f0bd-728b-4ee5-8a79-24c1069b2785", | 33 | "id": "b3a9f0bd-728b-4ee5-8a79-24c1069b2785", | ||
34 | "isopen": false, | 34 | "isopen": false, | ||
35 | "license_id": null, | 35 | "license_id": null, | ||
36 | "license_title": null, | 36 | "license_title": null, | ||
37 | "maintainer": null, | 37 | "maintainer": null, | ||
38 | "maintainer_email": null, | 38 | "maintainer_email": null, | ||
39 | "metadata_created": "2025-10-10T07:19:37.595707", | 39 | "metadata_created": "2025-10-10T07:19:37.595707", | ||
n | 40 | "metadata_modified": "2025-10-10T07:19:37.595714", | n | 40 | "metadata_modified": "2025-10-10T07:19:38.110278", |
41 | "name": | 41 | "name": | ||
42 | ing-library-for-exploratory-data-analysis-on-large-soci-f5be08e69cac", | 42 | ing-library-for-exploratory-data-analysis-on-large-soci-f5be08e69cac", | ||
43 | "notes": "At least 350k posts are published on X, 510k comments are | 43 | "notes": "At least 350k posts are published on X, 510k comments are | ||
44 | posted on Facebook, and 66k pictures and videos are shared on | 44 | posted on Facebook, and 66k pictures and videos are shared on | ||
45 | Instagram each minute. These large datasets require substantial | 45 | Instagram each minute. These large datasets require substantial | ||
46 | processing power, even if only a percentage is collected for analysis | 46 | processing power, even if only a percentage is collected for analysis | ||
47 | and research. To face this challenge, data scientists can now use | 47 | and research. To face this challenge, data scientists can now use | ||
48 | computer clusters deployed on various IaaS and PaaS services in the | 48 | computer clusters deployed on various IaaS and PaaS services in the | ||
49 | cloud. However, scientists still have to master the design of | 49 | cloud. However, scientists still have to master the design of | ||
50 | distributed algorithms and be familiar with using distributed | 50 | distributed algorithms and be familiar with using distributed | ||
51 | computing programming frameworks. It is thus essential to generate | 51 | computing programming frameworks. It is thus essential to generate | ||
52 | tools that provide analysis methods to leverage the advantages of | 52 | tools that provide analysis methods to leverage the advantages of | ||
53 | computer clusters for processing large amounts of social network text. | 53 | computer clusters for processing large amounts of social network text. | ||
54 | This paper presents Whistlerlib, a new Python library for conducting | 54 | This paper presents Whistlerlib, a new Python library for conducting | ||
55 | exploratory analysis on large text datasets on social networks. | 55 | exploratory analysis on large text datasets on social networks. | ||
56 | Whistlerlib implements distributed versions of various social media, | 56 | Whistlerlib implements distributed versions of various social media, | ||
57 | sentiment, and social network analysis methods that can run atop | 57 | sentiment, and social network analysis methods that can run atop | ||
58 | computer clusters. We experimentally demonstrate the scalability of | 58 | computer clusters. We experimentally demonstrate the scalability of | ||
59 | the various Whistlerlib distributed methods when deployed on a public | 59 | the various Whistlerlib distributed methods when deployed on a public | ||
60 | cloud platform. We also present a practical example of the analysis of | 60 | cloud platform. We also present a practical example of the analysis of | ||
61 | posts on the social network X about the Mexico City subway to showcase | 61 | posts on the social network X about the Mexico City subway to showcase | ||
62 | the features of Whistlerlib in scenarios where social network analysis | 62 | the features of Whistlerlib in scenarios where social network analysis | ||
63 | tools are needed to address issues with a social dimension.", | 63 | tools are needed to address issues with a social dimension.", | ||
n | 64 | "num_resources": 0, | n | 64 | "num_resources": 1, |
65 | "num_tags": 9, | 65 | "num_tags": 9, | ||
66 | "organization": { | 66 | "organization": { | ||
67 | "approval_status": "approved", | 67 | "approval_status": "approved", | ||
68 | "created": "2022-05-19T00:10:30.480393", | 68 | "created": "2022-05-19T00:10:30.480393", | ||
69 | "description": "Observatorio Metropolitano CentroGeo", | 69 | "description": "Observatorio Metropolitano CentroGeo", | ||
70 | "id": "b3b3a79d-748a-4464-9471-732b6c74ec53", | 70 | "id": "b3b3a79d-748a-4464-9471-732b6c74ec53", | ||
71 | "image_url": | 71 | "image_url": | ||
72 | "2022-05-19-001030.456616FullColor1280x1024LogoOnly.png", | 72 | "2022-05-19-001030.456616FullColor1280x1024LogoOnly.png", | ||
73 | "is_organization": true, | 73 | "is_organization": true, | ||
74 | "name": "observatorio-metropolitano-centrogeo", | 74 | "name": "observatorio-metropolitano-centrogeo", | ||
75 | "state": "active", | 75 | "state": "active", | ||
76 | "title": "Observatorio Metropolitano CentroGeo", | 76 | "title": "Observatorio Metropolitano CentroGeo", | ||
77 | "type": "organization" | 77 | "type": "organization" | ||
78 | }, | 78 | }, | ||
79 | "owner_org": "b3b3a79d-748a-4464-9471-732b6c74ec53", | 79 | "owner_org": "b3b3a79d-748a-4464-9471-732b6c74ec53", | ||
80 | "private": false, | 80 | "private": false, | ||
81 | "relationships_as_object": [], | 81 | "relationships_as_object": [], | ||
82 | "relationships_as_subject": [], | 82 | "relationships_as_subject": [], | ||
t | 83 | "resources": [], | t | 83 | "resources": [ |
84 | { | ||||
85 | "cache_last_updated": null, | ||||
86 | "cache_url": null, | ||||
87 | "created": "2025-10-10T07:19:38.138694", | ||||
88 | "datastore_active": false, | ||||
89 | "description": "At least 350k posts are published on X, 510k | ||||
90 | comments are posted on Facebook, and 66k pictures and videos are | ||||
91 | shared on Instagram each minute. These large datasets require | ||||
92 | substantial processing power, even if only a percentage is collected | ||||
93 | for analysis and research. To face this challenge, data scientists can | ||||
94 | now use computer clusters deployed on various IaaS and PaaS services | ||||
95 | in the cloud. However, scientists still have to master the design of | ||||
96 | distributed algorithms and be familiar with using distributed | ||||
97 | computing programming frameworks. It is thus essential to generate | ||||
98 | tools that provide analysis methods to leverage the advantages of | ||||
99 | computer clusters for processing large amounts of social network text. | ||||
100 | This paper presents Whistlerlib, a new Python library for conducting | ||||
101 | exploratory analysis on large text datasets on social networks. | ||||
102 | Whistlerlib implements distributed versions of various social media, | ||||
103 | sentiment, and social network analysis methods that can run atop | ||||
104 | computer clusters. We experimentally demonstrate the scalability of | ||||
105 | the various Whistlerlib distributed methods when deployed on a public | ||||
106 | cloud platform. We also present a practical example of the analysis of | ||||
107 | posts on the social network X about the Mexico City subway to showcase | ||||
108 | the features of Whistlerlib in scenarios where social network analysis | ||||
109 | tools are needed to address issues with a social dimension.", | ||||
110 | "format": "HTML", | ||||
111 | "hash": "", | ||||
112 | "id": "298aa89e-8339-4dd5-95e7-5122f47ae39c", | ||||
113 | "last_modified": null, | ||||
114 | "metadata_modified": "2025-10-10T07:19:38.115157", | ||||
115 | "mimetype": null, | ||||
116 | "mimetype_inner": null, | ||||
117 | "name": "Whistlerlib: a distributed computing library for | ||||
118 | exploratory data analysis on large social network datasets", | ||||
119 | "package_id": "b3a9f0bd-728b-4ee5-8a79-24c1069b2785", | ||||
120 | "position": 0, | ||||
121 | "resource_type": null, | ||||
122 | "size": null, | ||||
123 | "state": "active", | ||||
124 | "url": "https://doi.org/10.1007/s11042-024-19827-z", | ||||
125 | "url_type": null | ||||
126 | } | ||||
127 | ], | ||||
84 | "state": "active", | 128 | "state": "active", | ||
85 | "tags": [ | 129 | "tags": [ | ||
86 | { | 130 | { | ||
87 | "display_name": "computer-science", | 131 | "display_name": "computer-science", | ||
88 | "id": "29cae056-cd7e-43f7-be5b-b25869a3fbf2", | 132 | "id": "29cae056-cd7e-43f7-be5b-b25869a3fbf2", | ||
89 | "name": "computer-science", | 133 | "name": "computer-science", | ||
90 | "state": "active", | 134 | "state": "active", | ||
91 | "vocabulary_id": null | 135 | "vocabulary_id": null | ||
92 | }, | 136 | }, | ||
93 | { | 137 | { | ||
94 | "display_name": "data-mining", | 138 | "display_name": "data-mining", | ||
95 | "id": "bc92b940-6ae7-4005-9885-39c0bf8e2aa7", | 139 | "id": "bc92b940-6ae7-4005-9885-39c0bf8e2aa7", | ||
96 | "name": "data-mining", | 140 | "name": "data-mining", | ||
97 | "state": "active", | 141 | "state": "active", | ||
98 | "vocabulary_id": null | 142 | "vocabulary_id": null | ||
99 | }, | 143 | }, | ||
100 | { | 144 | { | ||
101 | "display_name": "data-science", | 145 | "display_name": "data-science", | ||
102 | "id": "468c4e82-ef0a-4119-b7cb-a3882df895bf", | 146 | "id": "468c4e82-ef0a-4119-b7cb-a3882df895bf", | ||
103 | "name": "data-science", | 147 | "name": "data-science", | ||
104 | "state": "active", | 148 | "state": "active", | ||
105 | "vocabulary_id": null | 149 | "vocabulary_id": null | ||
106 | }, | 150 | }, | ||
107 | { | 151 | { | ||
108 | "display_name": "exploratory-analysis", | 152 | "display_name": "exploratory-analysis", | ||
109 | "id": "5888ef9a-51e0-4ab2-b88c-6bf06a428de0", | 153 | "id": "5888ef9a-51e0-4ab2-b88c-6bf06a428de0", | ||
110 | "name": "exploratory-analysis", | 154 | "name": "exploratory-analysis", | ||
111 | "state": "active", | 155 | "state": "active", | ||
112 | "vocabulary_id": null | 156 | "vocabulary_id": null | ||
113 | }, | 157 | }, | ||
114 | { | 158 | { | ||
115 | "display_name": "exploratory-data-analysis", | 159 | "display_name": "exploratory-data-analysis", | ||
116 | "id": "65dfe5fc-3c1b-49fc-949f-8bd5521c8061", | 160 | "id": "65dfe5fc-3c1b-49fc-949f-8bd5521c8061", | ||
117 | "name": "exploratory-data-analysis", | 161 | "name": "exploratory-data-analysis", | ||
118 | "state": "active", | 162 | "state": "active", | ||
119 | "vocabulary_id": null | 163 | "vocabulary_id": null | ||
120 | }, | 164 | }, | ||
121 | { | 165 | { | ||
122 | "display_name": "information-retrieval", | 166 | "display_name": "information-retrieval", | ||
123 | "id": "7c59b49e-d59b-48d5-9338-bece653ad9bc", | 167 | "id": "7c59b49e-d59b-48d5-9338-bece653ad9bc", | ||
124 | "name": "information-retrieval", | 168 | "name": "information-retrieval", | ||
125 | "state": "active", | 169 | "state": "active", | ||
126 | "vocabulary_id": null | 170 | "vocabulary_id": null | ||
127 | }, | 171 | }, | ||
128 | { | 172 | { | ||
129 | "display_name": "social-network-analysis", | 173 | "display_name": "social-network-analysis", | ||
130 | "id": "3c409d4b-6f3f-47e6-83a3-b4703c5421a7", | 174 | "id": "3c409d4b-6f3f-47e6-83a3-b4703c5421a7", | ||
131 | "name": "social-network-analysis", | 175 | "name": "social-network-analysis", | ||
132 | "state": "active", | 176 | "state": "active", | ||
133 | "vocabulary_id": null | 177 | "vocabulary_id": null | ||
134 | }, | 178 | }, | ||
135 | { | 179 | { | ||
136 | "display_name": "social-network-sociolinguistics", | 180 | "display_name": "social-network-sociolinguistics", | ||
137 | "id": "61ad3187-6c76-4938-a68d-67394dea4779", | 181 | "id": "61ad3187-6c76-4938-a68d-67394dea4779", | ||
138 | "name": "social-network-sociolinguistics", | 182 | "name": "social-network-sociolinguistics", | ||
139 | "state": "active", | 183 | "state": "active", | ||
140 | "vocabulary_id": null | 184 | "vocabulary_id": null | ||
141 | }, | 185 | }, | ||
142 | { | 186 | { | ||
143 | "display_name": "world-wide-web", | 187 | "display_name": "world-wide-web", | ||
144 | "id": "f1c3da43-fac0-460a-818b-eea11705e7ff", | 188 | "id": "f1c3da43-fac0-460a-818b-eea11705e7ff", | ||
145 | "name": "world-wide-web", | 189 | "name": "world-wide-web", | ||
146 | "state": "active", | 190 | "state": "active", | ||
147 | "vocabulary_id": null | 191 | "vocabulary_id": null | ||
148 | } | 192 | } | ||
149 | ], | 193 | ], | ||
150 | "title": "Whistlerlib: a distributed computing library for | 194 | "title": "Whistlerlib: a distributed computing library for | ||
151 | exploratory data analysis on large social network datasets", | 195 | exploratory data analysis on large social network datasets", | ||
152 | "type": "dataset", | 196 | "type": "dataset", | ||
153 | "url": "https://doi.org/10.1007/s11042-024-19827-z", | 197 | "url": "https://doi.org/10.1007/s11042-024-19827-z", | ||
154 | "version": null | 198 | "version": null | ||
155 | } | 199 | } |